Enable iommu in BIOS
Get MMUI groups
shopt -s nullglob for g in $(find /sys/kernel/iommu_groups/* -maxdepth 0 -type d | sort -V); do echo "IOMMU Group ${g##*/}:" for d in $g/devices/*; do echo -e "\t$(lspci -nns ${d##*/})" done; done;
Configure compute node
NVIDIA_PCI_ID=$(lspci -nn | grep NVIDIA | cut -d "[" -f3 | cut -d "]" -f1) echo "blacklist nouveau" >> /etc/modprobe.d/blacklist-nvidia.conf echo "blacklist nvidiafb" >> /etc/modprobe.d/blacklist-nvidia.conf echo vfio-pci >> /etc/modules-load.d/vfio-pci.conf # configure all PCI IDs (also Audio device from same MMUI group as ids=10de:xxxx,10de:xxxx echo options vfio-pci ids=${NVIDIA_PCI_ID} >> /etc/modprobe.d/gpu-vfio.conf sed -i 's/GRUB_CMDLINE_LINUX_DEFAULT=.*/GRUB_CMDLINE_LINUX_DEFAULT="amd_iommu=on"/g' /etc/default/grub update-grub reboot
#/etc/modprobe.d/kvm.conf #options kvm ignore_msrs=Y #echo "options vfio-pci ids=${NVIDIA_PCI_ID}" >> /etc/modprobe.d/vfio.conf #echo "softdep drm pre: vfio-pci" >> /etc/modprobe.d/vfio.conf #echo "options kvm ignore_msrs=1" >> /etc/modprobe.d/kvm.conf #echo "vfio vfio_iommu_type1 vfio_virqfd vfio_pci ids=${NVIDIA_PCI_ID}" >> /etc/initramfs-tools/modules #echo GRUB_CMDLINE_LINUX_DEFAULT=\"intel_iommu=on vfio-pci.ids=${NVIDIA_PCI_ID} vfio_iommu_type1.allow_unsafe_interrupts=1 modprobe.blacklist=nvidiafb,nouveau\" >> /etc/default/grub #echo GRUB_CMDLINE_LINUX_DEFAULT=\"amd_iommu=on vfio-pci.ids=${NVIDIA_PCI_ID} vfio_iommu_type1.allow_unsafe_interrupts=1 modprobe.blacklist=nvidiafb,nouveau\" >> /etc/default/grub #echo 'GRUB_CMDLINE_LINUX_DEFAULT="amd_iommu=on kvm.ignore_msrs=1 vfio-pci.ids=10de:27b8"' >> /etc/default/grub #echo 'GRUB_CMDLINE_LINUX_DEFAULT="amd_iommu=on kvm.ignore_msrs=1"' >> /etc/default/grub # iommu=pt
Test whather vfio driver is loaded
for PCI_ID in $(lspci | grep NVIDIA | cut -d" " -f1); do sudo lspci -s ${PCI_ID} -k done 02:00.0 3D controller: NVIDIA Corporation Device 27b8 (rev a1) Subsystem: NVIDIA Corporation Device 16ca Kernel driver in use: vfio-pci Kernel modules: nvidiafb, nouveau
Configure OpenStack
type-PF or type-PCI depending on the GPU
# /etc/kolla/config/ew/nova/nova-compute.conf ... [pci] device_spec = { "vendor_id": "10de", "product_id": "27b8" } alias = { "vendor_id":"10de", "product_id":"27b8", "device_type":"type-PF", "name":"gpu" } # /etc/kolla/config/ew/nova/nova-api.conf ... [pci] alias = { "vendor_id":"10de", "product_id":"27b8", "device_type":"type-PF", "name":"gpu" } # /etc/kolla/config/ew/nova/nova-scheduler.conf [filter_scheduler] ... available_filters = nova.scheduler.filters.all_filters enabled_filters = ...,PciPassthroughFilter
Create flavor
openstack flavor create gpu-8c64g \ --vcpus 8 \ --ram 65536 \ --disk 100 \ --property "pci_passthrough:alias"="gpu:1"
Create VM
openstack server create --flavor gpu-16c64g --image "Ubuntu 22.04" --network test-network --security-group test-secgroup --key-name test-keypair gpu-test-${RANDOM}
OPTIONAL: Auto-install GPU driver
cat <<EOF> gpu.cloud-config #cloud-config package_upgrade: true packages: - ubuntu-drivers-common #- nvidia-driver-550-server runcmd: - ubuntu-drivers install - echo nvidia-smi > /etc/update-motd.d/99-gpu - chmod 755 /etc/update-motd.d/99-gpu EOF # create VM openstack server create --flavor gpu-16c64g --image "Ubuntu 24.04 GPU" --network test-network --security-group test-secgroup --key-name test-keypair gpu-test-$$ openstack server create --flavor gpu-32c96g --image "Ubuntu 24.04 GPU" --network test-network --security-group test-secgroup --key-name test-keypair gpu-test-$$ --user-data gpu.cloud-config
Install GPU driver inside of the VM
sudo apt update && sudo apt install -y ubuntu-drivers-common #sudo apt install -y nvidia-driver-550-server # apt install ubuntu-drivers-common # ubuntu-drivers devices sudo ubuntu-drivers install # show GPU usage on login echo -e '#!/bin/bash\n\nnvidia-smi' | sudo tee /etc/update-motd.d/99-gpu sudo chmod 755 /etc/update-motd.d/99-gpu reboot
GPU Benchmark
https://docs.fuga.cloud/cloud/benchmark/how-to-do-a-gpu-benchmark-test/
wget https://cdn.geekbench.com/Geekbench-5.4.1-Linux.tar.gz tar -xzvf Geekbench-5.4.1-Linux.tar.gz cd Geekbench-5.4.1-Linux ./geekbench5 --compute CUDA
CUDA
https://developer.nvidia.com/cuda-gpus
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb sudo dpkg -i cuda-keyring_1.1-1_all.deb sudo apt-get update sudo apt-get -y install cuda-toolkit-12-5 sudo apt-get install -y cuda-drivers sudo apt-get install -y nvidia-driver-555-open sudo apt-get install -y cuda-drivers-555 nvtop # hashcat -b
gpu-burn
https://github.com/wilicc/gpu-burn
sudo apt install -y gpu-burn libcublas12 gpu-burn 3600 # burns all GPUs for an hour
Links
https://docs.openstack.org/nova/latest/admin/pci-passthrough.html
https://wiki.archlinux.org/title/PCI_passthrough_via_OVMF
https://superuser.openinfra.dev/articles/a-comprehensive-guide-to-configuring-gpu-passthrough-in-openstack-for-high-performance-computing/
https://satishdotpatel.github.io/gpu-passthrough-for-openstack/
https://gitlab.com/polloloco/vgpu-proxmox
https://www.reddit.com/r/openstack/comments/1aoi3sg/how_to_create_instances_with_gpu/
https://128b.xyz/ops/initial_setup
https://doc.opensuse.org/documentation/leap/virtualization/html/book-virtualization/app-gpu-passthru.html
https://www.jimmdenton.com/gpu-offloading-openstack/