From 7d7b6643b23cef0204bdc58a9ef519baa3a535f2 Mon Sep 17 00:00:00 2001 From: vishesh92 Date: Wed, 8 Apr 2026 14:15:01 +0530 Subject: [PATCH 1/6] Fix domain parsing for GPU --- .../kvm/resource/LibvirtGpuDef.java | 16 +- .../kvm/resource/LibvirtGpuDefTest.java | 104 ++++++++++ scripts/vm/hypervisor/kvm/gpudiscovery.sh | 192 ++++++++++++++---- 3 files changed, 264 insertions(+), 48 deletions(-) diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java index 08086859fb70..77ee9b0af31f 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java @@ -79,7 +79,9 @@ private void generatePciXml(StringBuilder gpuBuilder) { gpuBuilder.append(" \n"); gpuBuilder.append(" \n"); - // Parse the bus address (e.g., 00:02.0) into domain, bus, slot, function + // Parse the bus address into domain, bus, slot, function. Two input formats are accepted: + // - "dddd:bb:ss.f" full PCI address with domain (e.g. 0000:00:02.0) + // - "bb:ss.f" legacy short BDF; domain defaults to 0000 String domain = "0x0000"; String bus = "0x00"; String slot = "0x00"; @@ -87,9 +89,17 @@ private void generatePciXml(StringBuilder gpuBuilder) { if (busAddress != null && !busAddress.isEmpty()) { String[] parts = busAddress.split(":"); - if (parts.length > 1) { + String slotFunction = null; + if (parts.length == 3) { + domain = "0x" + parts[0]; + bus = "0x" + parts[1]; + slotFunction = parts[2]; + } else if (parts.length == 2) { bus = "0x" + parts[0]; - String[] slotFunctionParts = parts[1].split("\\."); + slotFunction = parts[1]; + } + if (slotFunction != null) { + String[] slotFunctionParts = slotFunction.split("\\."); if (slotFunctionParts.length > 0) { slot = "0x" + slotFunctionParts[0]; if (slotFunctionParts.length > 1) { diff --git a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDefTest.java b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDefTest.java index 0060e1d7ed4d..6b8eac8e2231 100644 --- a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDefTest.java +++ b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDefTest.java @@ -115,6 +115,110 @@ public void testGpuDef_withComplexPciAddress() { assertTrue(gpuXml.contains("")); } + @Test + public void testGpuDef_withFullPciAddressDomainZero() { + LibvirtGpuDef gpuDef = new LibvirtGpuDef(); + VgpuTypesInfo pciGpuInfo = new VgpuTypesInfo( + GpuDevice.DeviceType.PCI, + "passthrough", + "passthrough", + "0000:00:02.0", + "10de", + "NVIDIA Corporation", + "1b38", + "Tesla T4" + ); + gpuDef.defGpu(pciGpuInfo); + + String gpuXml = gpuDef.toString(); + + assertTrue(gpuXml.contains("
")); + } + + @Test + public void testGpuDef_withFullPciAddressNonZeroDomain() { + LibvirtGpuDef gpuDef = new LibvirtGpuDef(); + VgpuTypesInfo pciGpuInfo = new VgpuTypesInfo( + GpuDevice.DeviceType.PCI, + "passthrough", + "passthrough", + "0001:65:00.0", + "10de", + "NVIDIA Corporation", + "1b38", + "Tesla T4" + ); + gpuDef.defGpu(pciGpuInfo); + + String gpuXml = gpuDef.toString(); + + assertTrue(gpuXml.contains("
")); + } + + @Test + public void testGpuDef_withFullPciAddressWideDomain() { + LibvirtGpuDef gpuDef = new LibvirtGpuDef(); + VgpuTypesInfo pciGpuInfo = new VgpuTypesInfo( + GpuDevice.DeviceType.PCI, + "passthrough", + "passthrough", + "10000:af:00.1", + "10de", + "NVIDIA Corporation", + "1b38", + "Tesla T4" + ); + gpuDef.defGpu(pciGpuInfo); + + String gpuXml = gpuDef.toString(); + + assertTrue(gpuXml.contains("
")); + } + + @Test + public void testGpuDef_withFullPciAddressVfNonZeroDomain() { + LibvirtGpuDef gpuDef = new LibvirtGpuDef(); + VgpuTypesInfo vfGpuInfo = new VgpuTypesInfo( + GpuDevice.DeviceType.PCI, + "VF-Profile", + "VF-Profile", + "0002:81:00.3", + "10de", + "NVIDIA Corporation", + "1eb8", + "Tesla T4" + ); + gpuDef.defGpu(vfGpuInfo); + + String gpuXml = gpuDef.toString(); + + // Non-passthrough NVIDIA VFs should be unmanaged + assertTrue(gpuXml.contains("")); + assertTrue(gpuXml.contains("
")); + } + + @Test + public void testGpuDef_withLegacyShortBdfDefaultsDomainToZero() { + // Backward compatibility: short BDF with no domain segment must still + // produce a valid libvirt address with domain 0x0000. + LibvirtGpuDef gpuDef = new LibvirtGpuDef(); + VgpuTypesInfo pciGpuInfo = new VgpuTypesInfo( + GpuDevice.DeviceType.PCI, + "passthrough", + "passthrough", + "af:00.0", + "10de", + "NVIDIA Corporation", + "1b38", + "Tesla T4" + ); + gpuDef.defGpu(pciGpuInfo); + + String gpuXml = gpuDef.toString(); + + assertTrue(gpuXml.contains("
")); + } + @Test public void testGpuDef_withNullDeviceType() { LibvirtGpuDef gpuDef = new LibvirtGpuDef(); diff --git a/scripts/vm/hypervisor/kvm/gpudiscovery.sh b/scripts/vm/hypervisor/kvm/gpudiscovery.sh index bf6892c898db..e7c40c8810ca 100755 --- a/scripts/vm/hypervisor/kvm/gpudiscovery.sh +++ b/scripts/vm/hypervisor/kvm/gpudiscovery.sh @@ -357,7 +357,33 @@ # "vgpu_instances":[], # "vf_instances":[] -# } +# }, +# { +# "pci_address": "0001:65:00.0", +# "vendor_id": "10de", +# "device_id": "20B0", +# "vendor": "NVIDIA Corporation", +# "device": "A100-SXM4-40GB", +# "driver": "nvidia", +# "pci_class": "3D controller", +# "iommu_group": "20", +# "sriov_totalvfs": 0, +# "sriov_numvfs": 0, +# +# "full_passthrough": { +# "enabled": true, +# "libvirt_address": { +# "domain": "0x0001", +# "bus": "0x65", +# "slot": "0x00", +# "function": "0x0" +# }, +# "used_by_vm": "ml-train" +# }, +# +# "vgpu_instances": [], +# "vf_instances": [] +# } # ] # } # @@ -416,9 +442,22 @@ parse_nvidia_vgpu_profiles() { store_profile_data gpu_address="${BASH_REMATCH[1]}" - # Convert from format like 00000000:AF:00.0 to AF:00.0 and normalize to lowercase - if [[ $gpu_address =~ [0-9A-Fa-f]+:([0-9A-Fa-f]+:[0-9A-Fa-f]+\.[0-9A-Fa-f]+) ]]; then - gpu_address="${BASH_REMATCH[1],,}" + # nvidia-smi reports addresses in the form "00000000:AF:00.0" + # (8-digit domain). Reformat to the canonical 4-digit + # "dddd:bb:ss.f" lowercase form so cache keys line up with the + # normalize_pci_address output used at lookup time. Short + # addresses ("AF:00.0") are widened by prepending domain 0. + if [[ $gpu_address =~ ^([0-9A-Fa-f]+):([0-9A-Fa-f]+):([0-9A-Fa-f]+)\.([0-9A-Fa-f]+)$ ]]; then + gpu_address=$(printf "%04x:%s:%s.%s" \ + "0x${BASH_REMATCH[1]}" \ + "${BASH_REMATCH[2],,}" \ + "${BASH_REMATCH[3],,}" \ + "${BASH_REMATCH[4],,}") + elif [[ $gpu_address =~ ^([0-9A-Fa-f]+):([0-9A-Fa-f]+)\.([0-9A-Fa-f]+)$ ]]; then + gpu_address=$(printf "0000:%s:%s.%s" \ + "${BASH_REMATCH[1],,}" \ + "${BASH_REMATCH[2],,}" \ + "${BASH_REMATCH[3],,}") else gpu_address="${gpu_address,,}" fi @@ -506,10 +545,52 @@ get_nvidia_profile_info() { fi } -# Get nodedev name for a PCI address (e.g. "00:02.0" -> "pci_0000_00_02_0") -get_nodedev_name() { +# Parse a PCI address and assign the libvirt-formatted DOMAIN/BUS/SLOT/FUNC +# values into the caller's scope. Accepts both full ("0000:00:02.0") and short +# ("00:02.0") formats; short addresses are assumed to be in PCI domain 0. +# +# IMPORTANT: bash uses dynamic scoping, so callers that don't want these four +# values to leak into the global scope MUST declare them `local` before +# invoking this function, e.g.: +# local DOMAIN BUS SLOT FUNC +# parse_pci_address "$addr" +parse_pci_address() { + local addr="$1" + if [[ $addr =~ ^([0-9A-Fa-f]+):([0-9A-Fa-f]+):([0-9A-Fa-f]+)\.([0-9A-Fa-f]+)$ ]]; then + DOMAIN="0x${BASH_REMATCH[1]}" + BUS="0x${BASH_REMATCH[2]}" + SLOT="0x${BASH_REMATCH[3]}" + FUNC="0x${BASH_REMATCH[4]}" + elif [[ $addr =~ ^([0-9A-Fa-f]+):([0-9A-Fa-f]+)\.([0-9A-Fa-f]+)$ ]]; then + DOMAIN="0x0000" + BUS="0x${BASH_REMATCH[1]}" + SLOT="0x${BASH_REMATCH[2]}" + FUNC="0x${BASH_REMATCH[3]}" + else + DOMAIN="0x0000" + BUS="0x00" + SLOT="0x00" + FUNC="0x0" + fi +} + +# Normalize a PCI address to its full domain-qualified form ("dddd:bb:ss.f"). +# Short addresses are widened by prepending domain "0000". +normalize_pci_address() { local addr="$1" - echo "pci_$(echo "$addr" | sed 's/[:.]/\_/g' | sed 's/^/0000_/')" + if [[ $addr =~ ^[0-9A-Fa-f]+:[0-9A-Fa-f]+:[0-9A-Fa-f]+\.[0-9A-Fa-f]+$ ]]; then + echo "$addr" + else + echo "0000:$addr" + fi +} + +# Get nodedev name for a PCI address (e.g. "0000:00:02.0" -> "pci_0000_00_02_0"). +# Short addresses are widened to domain 0 first. +get_nodedev_name() { + local addr + addr=$(normalize_pci_address "$1") + echo "pci_$(echo "$addr" | sed 's/[:.]/\_/g')" } # Get cached nodedev XML for a PCI address @@ -572,9 +653,12 @@ get_numa_node() { echo "${node:--1}" } -# Given a PCI address, return its PCI root (the top‐level bridge ID, e.g. "0000:00:03") +# Given a PCI address, return its PCI root (the top‐level bridge ID, e.g. +# "0000:00:03.0"). Both full ("0000:00:02.0") and short ("00:02.0") inputs are +# accepted; output is always domain-qualified. get_pci_root() { - local addr="$1" + local addr + addr=$(normalize_pci_address "$1") local xml xml=$(get_nodedev_xml "$addr") @@ -583,21 +667,23 @@ get_pci_root() { local parent parent=$(echo "$xml" | xmlstarlet sel -t -v "/device/parent" 2>/dev/null || true) if [[ -n "$parent" ]]; then - # If parent is a PCI device, recursively find its root - if [[ $parent =~ ^pci_0000_([0-9A-Fa-f]{2})_([0-9A-Fa-f]{2})_([0-9A-Fa-f])$ ]]; then - local parent_addr="${BASH_REMATCH[1]}:${BASH_REMATCH[2]}.${BASH_REMATCH[3]}" + # If parent is a PCI device, recursively find its root. + # libvirt nodedev names look like pci____ + # where is one or more hex digits (typically 4). + if [[ $parent =~ ^pci_([0-9A-Fa-f]+)_([0-9A-Fa-f]{2})_([0-9A-Fa-f]{2})_([0-9A-Fa-f])$ ]]; then + local parent_addr="${BASH_REMATCH[1]}:${BASH_REMATCH[2]}:${BASH_REMATCH[3]}.${BASH_REMATCH[4]}" get_pci_root "$parent_addr" return else - # Parent is not PCI device, so current device is the root - echo "0000:$addr" + # Parent is not a PCI device, so current device is the root + echo "$addr" return fi fi fi # fallback - echo "0000:$addr" + echo "$addr" } # Build VM → hostdev maps: @@ -613,16 +699,21 @@ for VM in "${VMS[@]}"; do continue fi - # -- PCI hostdevs: use xmlstarlet to extract BDF for all PCI host devices -- - while read -r bus slot func; do + # -- PCI hostdevs: use xmlstarlet to extract the full domain:bus:slot.func + # for all PCI host devices. libvirt's
element may omit the domain + # attribute, in which case we default to 0. + while read -r dom bus slot func; do [[ -n "$bus" && -n "$slot" && -n "$func" ]] || continue - # Format to match lspci output (e.g., 01:00.0) by padding with zeros + [[ -n "$dom" ]] || dom="0" + # Format to match lspci -D output (e.g., 0000:01:00.0) by padding with zeros + dom_fmt=$(printf "%04x" "0x$dom") bus_fmt=$(printf "%02x" "0x$bus") slot_fmt=$(printf "%02x" "0x$slot") func_fmt=$(printf "%x" "0x$func") - BDF="$bus_fmt:$slot_fmt.$func_fmt" + BDF="${dom_fmt}:${bus_fmt}:${slot_fmt}.${func_fmt}" pci_to_vm["$BDF"]="$VM" done < <(echo "$xml" | xmlstarlet sel -T -t -m "//hostdev[@type='pci']/source/address" \ + -v "substring-after(@domain, '0x')" -o " " \ -v "substring-after(@bus, '0x')" -o " " \ -v "substring-after(@slot, '0x')" -o " " \ -v "substring-after(@function, '0x')" -n 2>/dev/null || true) @@ -677,7 +768,8 @@ parse_and_add_gpu_properties() { # Appends JSON strings for each found mdev instance to the global 'vlist' array. # Arguments: # $1: mdev_base_path (e.g., /sys/bus/pci/devices/.../mdev_supported_types) -# $2: bdf (e.g., 01:00.0) +# $2: bdf — short "bb:ss.f" (e.g. 01:00.0) or full +# "dddd:bb:ss.f" (e.g. 0001:65:00.0) for non-zero PCI domains process_mdev_instances() { local mdev_base_path="$1" local bdf="$2" @@ -705,10 +797,8 @@ process_mdev_instances() { local MDEV_UUID MDEV_UUID=$(basename "$UDIR") - local DOMAIN="0x0000" - local BUS="0x${bdf:0:2}" - local SLOT="0x${bdf:3:2}" - local FUNC="0x${bdf:6:1}" + local DOMAIN BUS SLOT FUNC + parse_pci_address "$bdf" local raw raw="${mdev_to_vm[$MDEV_UUID]:-}" @@ -727,6 +817,10 @@ process_mdev_instances() { # Parse nvidia-smi vgpu profiles once at the beginning parse_nvidia_vgpu_profiles +# `lspci -nnm` keeps the existing output format for devices in PCI domain 0 +# (short "bb:ss.f" form) and includes the domain prefix only for devices on +# non-zero PCI segments (multi-IIO servers, some ARM systems). The helpers +# below (parse_pci_address, normalize_pci_address) accept both formats. mapfile -t LINES < <(lspci -nnm) echo '{ "gpus": [' @@ -743,8 +837,13 @@ for LINE in "${LINES[@]}"; do continue fi + # sysfs paths always require the full domain-qualified form. PCI_ADDR may + # be short ("00:02.0") for domain 0 or full ("0001:65:00.0") otherwise, so + # we derive a separate SYSFS_ADDR just for filesystem lookups. + SYSFS_ADDR=$(normalize_pci_address "$PCI_ADDR") + # If this is a VF, skip it. It will be processed under its PF. - if [[ -e "/sys/bus/pci/devices/0000:$PCI_ADDR/physfn" ]]; then + if [[ -e "/sys/bus/pci/devices/$SYSFS_ADDR/physfn" ]]; then continue fi @@ -761,7 +860,7 @@ for LINE in "${LINES[@]}"; do DEVICE_ID=$(sed -E 's/.*\[([0-9A-Fa-f]{4})\]$/\1/' <<<"$DEVICE_FIELD") # Kernel driver - DRV_PATH="/sys/bus/pci/devices/0000:$PCI_ADDR/driver" + DRV_PATH="/sys/bus/pci/devices/$SYSFS_ADDR/driver" if [[ -L $DRV_PATH ]]; then DRIVER=$(basename "$(readlink "$DRV_PATH")") else @@ -781,7 +880,7 @@ for LINE in "${LINES[@]}"; do read -r TOTALVFS NUMVFS < <(get_sriov_counts "$PCI_ADDR") # Get Physical GPU properties from its own description file, if available - PF_DESC_PATH="/sys/bus/pci/devices/0000:$PCI_ADDR/description" + PF_DESC_PATH="/sys/bus/pci/devices/$SYSFS_ADDR/description" parse_and_add_gpu_properties "$PF_DESC_PATH" # Save physical function's properties before they are overwritten by vGPU/VF processing PF_MAX_INSTANCES=$MAX_INSTANCES @@ -791,25 +890,27 @@ for LINE in "${LINES[@]}"; do PF_MAX_RESOLUTION_Y=$MAX_RESOLUTION_Y # === full_passthrough usage === - raw="${pci_to_vm[$PCI_ADDR]:-}" + # pci_to_vm is keyed by the full domain-qualified form, so normalize the + # lspci-style PCI_ADDR (which may be short for domain 0) before lookup. + raw="${pci_to_vm[$(normalize_pci_address "$PCI_ADDR")]:-}" FULL_USED_JSON=$(to_json_vm "$raw") # === vGPU (MDEV) instances === VGPU_ARRAY="[]" declare -a vlist=() # Process mdev on the Physical Function - MDEV_BASE="/sys/bus/pci/devices/0000:$PCI_ADDR/mdev_supported_types" + MDEV_BASE="/sys/bus/pci/devices/$SYSFS_ADDR/mdev_supported_types" process_mdev_instances "$MDEV_BASE" "$PCI_ADDR" # === VF instances (SR-IOV / MIG) === VF_ARRAY="[]" declare -a flist=() if ((TOTALVFS > 0)); then - for VF_LINK in /sys/bus/pci/devices/0000:"$PCI_ADDR"/virtfn*; do + for VF_LINK in /sys/bus/pci/devices/"$SYSFS_ADDR"/virtfn*; do [[ -L $VF_LINK ]] || continue VF_PATH=$(readlink -f "$VF_LINK") - VF_ADDR=${VF_PATH##*/} # e.g. "0000:65:00.2" - VF_BDF="${VF_ADDR:5}" # "65:00.2" + # Keep the full domain-qualified address (e.g. "0000:65:00.2") + VF_BDF=${VF_PATH##*/} # For NVIDIA SR-IOV, check for vGPU (mdev) on the VF itself if [[ "$VENDOR_ID" == "10de" ]]; then @@ -817,10 +918,7 @@ for LINE in "${LINES[@]}"; do process_mdev_instances "$VF_MDEV_BASE" "$VF_BDF" fi - DOMAIN="0x0000" - BUS="0x${VF_BDF:0:2}" - SLOT="0x${VF_BDF:3:2}" - FUNC="0x${VF_BDF:6:1}" + parse_pci_address "$VF_BDF" # Determine vf_profile using nvidia-smi information VF_PROFILE="" @@ -835,8 +933,10 @@ for LINE in "${LINES[@]}"; do # For NVIDIA GPUs, check current vGPU type current_vgpu_type=$(get_current_vgpu_type "$VF_PATH") if [[ "$current_vgpu_type" != "0" ]]; then - # Get profile info from nvidia-smi cache - profile_info=$(get_nvidia_profile_info "$PCI_ADDR" "$current_vgpu_type") + # nvidia_vgpu_profiles is keyed by the canonical full + # "dddd:bb:ss.f" form, so widen PCI_ADDR (which may be + # short for domain 0) before the cache lookup. + profile_info=$(get_nvidia_profile_info "$(normalize_pci_address "$PCI_ADDR")" "$current_vgpu_type") IFS='|' read -r VF_PROFILE_NAME VF_MAX_INSTANCES VF_VIDEO_RAM VF_MAX_HEADS VF_MAX_RESOLUTION_X VF_MAX_RESOLUTION_Y <<< "$profile_info" VF_PROFILE="$VF_PROFILE_NAME" fi @@ -853,12 +953,17 @@ for LINE in "${LINES[@]}"; do fi VF_PROFILE_JSON=$(json_escape "$VF_PROFILE") - # Determine which VM uses this VF_BDF - raw="${pci_to_vm[$VF_BDF]:-}" + # Determine which VM uses this VF_BDF (normalize for map lookup) + raw="${pci_to_vm[$(normalize_pci_address "$VF_BDF")]:-}" USED_JSON=$(to_json_vm "$raw") + # For backward-compat JSON output, strip the default "0000:" domain + # prefix so domain-0 VFs still print as "65:00.2" rather than the + # full "0000:65:00.2". Non-zero domains are preserved unchanged. + VF_DISPLAY_ADDR="${VF_BDF#0000:}" + flist+=( - "{\"vf_pci_address\":\"$VF_BDF\",\"vf_profile\":$VF_PROFILE_JSON,\"max_instances\":$VF_MAX_INSTANCES,\"video_ram\":$VF_VIDEO_RAM,\"max_heads\":$VF_MAX_HEADS,\"max_resolution_x\":$VF_MAX_RESOLUTION_X,\"max_resolution_y\":$VF_MAX_RESOLUTION_Y,\"libvirt_address\":{\"domain\":\"$DOMAIN\",\"bus\":\"$BUS\",\"slot\":\"$SLOT\",\"function\":\"$FUNC\"},\"used_by_vm\":$USED_JSON}") + "{\"vf_pci_address\":\"$VF_DISPLAY_ADDR\",\"vf_profile\":$VF_PROFILE_JSON,\"max_instances\":$VF_MAX_INSTANCES,\"video_ram\":$VF_VIDEO_RAM,\"max_heads\":$VF_MAX_HEADS,\"max_resolution_x\":$VF_MAX_RESOLUTION_X,\"max_resolution_y\":$VF_MAX_RESOLUTION_Y,\"libvirt_address\":{\"domain\":\"$DOMAIN\",\"bus\":\"$BUS\",\"slot\":\"$SLOT\",\"function\":\"$FUNC\"},\"used_by_vm\":$USED_JSON}") done if [ ${#flist[@]} -gt 0 ]; then VF_ARRAY="[$( @@ -882,10 +987,7 @@ for LINE in "${LINES[@]}"; do if [[ ${#vlist[@]} -eq 0 && ${#flist[@]} -eq 0 ]]; then FP_ENABLED=1 fi - DOMAIN="0x0000" - BUS="0x${PCI_ADDR:0:2}" - SLOT="0x${PCI_ADDR:3:2}" - FUNC="0x${PCI_ADDR:6:1}" + parse_pci_address "$PCI_ADDR" # Emit JSON if $first_gpu; then From 78a511453e769f81cce2fcf605f0c810ebac8dbe Mon Sep 17 00:00:00 2001 From: vishesh92 Date: Wed, 8 Apr 2026 14:32:37 +0530 Subject: [PATCH 2/6] Address comments --- .../kvm/resource/LibvirtGpuDef.java | 45 +++++++++++-------- .../kvm/resource/LibvirtGpuDefTest.java | 8 ++-- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java index 77ee9b0af31f..b33f0179fc33 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java @@ -82,33 +82,42 @@ private void generatePciXml(StringBuilder gpuBuilder) { // Parse the bus address into domain, bus, slot, function. Two input formats are accepted: // - "dddd:bb:ss.f" full PCI address with domain (e.g. 0000:00:02.0) // - "bb:ss.f" legacy short BDF; domain defaults to 0000 - String domain = "0x0000"; - String bus = "0x00"; - String slot = "0x00"; - String function = "0x0"; + // Each segment is parsed as a hex integer and formatted with fixed widths + // (domain: 4 hex digits, bus/slot: 2 hex digits, function: 1 hex digit) to + // produce canonical libvirt XML values regardless of input casing or padding. + int domainVal = 0, busVal = 0, slotVal = 0, funcVal = 0; if (busAddress != null && !busAddress.isEmpty()) { String[] parts = busAddress.split(":"); String slotFunction = null; - if (parts.length == 3) { - domain = "0x" + parts[0]; - bus = "0x" + parts[1]; - slotFunction = parts[2]; - } else if (parts.length == 2) { - bus = "0x" + parts[0]; - slotFunction = parts[1]; - } - if (slotFunction != null) { - String[] slotFunctionParts = slotFunction.split("\\."); - if (slotFunctionParts.length > 0) { - slot = "0x" + slotFunctionParts[0]; - if (slotFunctionParts.length > 1) { - function = "0x" + slotFunctionParts[1].trim(); + try { + if (parts.length == 3) { + domainVal = Integer.parseInt(parts[0], 16); + busVal = Integer.parseInt(parts[1], 16); + slotFunction = parts[2]; + } else if (parts.length == 2) { + busVal = Integer.parseInt(parts[0], 16); + slotFunction = parts[1]; + } + if (slotFunction != null) { + String[] slotFunctionParts = slotFunction.split("\\."); + if (slotFunctionParts.length > 0) { + slotVal = Integer.parseInt(slotFunctionParts[0], 16); + if (slotFunctionParts.length > 1) { + funcVal = Integer.parseInt(slotFunctionParts[1].trim(), 16); + } } } + } catch (NumberFormatException e) { + // leave all values at 0 (safe defaults) } } + String domain = String.format("0x%04x", domainVal); + String bus = String.format("0x%02x", busVal); + String slot = String.format("0x%02x", slotVal); + String function = String.format("0x%x", funcVal); + gpuBuilder.append("
\n"); gpuBuilder.append(" \n"); diff --git a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDefTest.java b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDefTest.java index 6b8eac8e2231..efdc740fa67b 100644 --- a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDefTest.java +++ b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDefTest.java @@ -156,13 +156,15 @@ public void testGpuDef_withFullPciAddressNonZeroDomain() { } @Test - public void testGpuDef_withFullPciAddressWideDomain() { + public void testGpuDef_withNvidiaStyleEightDigitDomain() { + // nvidia-smi reports PCI addresses with an 8-digit domain (e.g. "00000001:af:00.1"). + // generatePciXml must normalize it to the canonical 4-digit form "0x0001". LibvirtGpuDef gpuDef = new LibvirtGpuDef(); VgpuTypesInfo pciGpuInfo = new VgpuTypesInfo( GpuDevice.DeviceType.PCI, "passthrough", "passthrough", - "10000:af:00.1", + "00000001:af:00.1", "10de", "NVIDIA Corporation", "1b38", @@ -172,7 +174,7 @@ public void testGpuDef_withFullPciAddressWideDomain() { String gpuXml = gpuDef.toString(); - assertTrue(gpuXml.contains("
")); + assertTrue(gpuXml.contains("
")); } @Test From 8ead015c296f0dd7f094c0b7a615a411a6b65152 Mon Sep 17 00:00:00 2001 From: Piet Braat Date: Fri, 3 Apr 2026 09:06:56 +0200 Subject: [PATCH 3/6] Add Display controller to GPU class check this adds support for the amd instinct mi2xx accelorator crards in the discovery script. --- scripts/vm/hypervisor/kvm/gpudiscovery.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/vm/hypervisor/kvm/gpudiscovery.sh b/scripts/vm/hypervisor/kvm/gpudiscovery.sh index e7c40c8810ca..7aec0369da7a 100755 --- a/scripts/vm/hypervisor/kvm/gpudiscovery.sh +++ b/scripts/vm/hypervisor/kvm/gpudiscovery.sh @@ -848,7 +848,7 @@ for LINE in "${LINES[@]}"; do fi # Only process GPU classes (3D controller) - if [[ ! "$PCI_CLASS" =~ (3D\ controller|Processing\ accelerators) ]]; then + if [[ ! "$PCI_CLASS" =~ (3D\ controller|Processing\ accelerators|Display\ controller) ]]; then continue fi From 6fa2a88cc64ce385449e53ed24e1da4e6b2acf92 Mon Sep 17 00:00:00 2001 From: vishesh92 Date: Wed, 8 Apr 2026 14:37:03 +0530 Subject: [PATCH 4/6] fixup --- scripts/vm/hypervisor/kvm/gpudiscovery.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/vm/hypervisor/kvm/gpudiscovery.sh b/scripts/vm/hypervisor/kvm/gpudiscovery.sh index 7aec0369da7a..3ee902503347 100755 --- a/scripts/vm/hypervisor/kvm/gpudiscovery.sh +++ b/scripts/vm/hypervisor/kvm/gpudiscovery.sh @@ -847,7 +847,7 @@ for LINE in "${LINES[@]}"; do continue fi - # Only process GPU classes (3D controller) + # Only process PCI classes - 3D controller, Processing accelerators, Display controller if [[ ! "$PCI_CLASS" =~ (3D\ controller|Processing\ accelerators|Display\ controller) ]]; then continue fi From 8d0979003cda743c66e106e9ab2b92ebf5f78303 Mon Sep 17 00:00:00 2001 From: vishesh92 Date: Wed, 8 Apr 2026 15:40:39 +0530 Subject: [PATCH 5/6] Address comments --- .../kvm/resource/LibvirtGpuDef.java | 22 +++++----- .../kvm/resource/LibvirtGpuDefTest.java | 33 ++++++++++++++ scripts/vm/hypervisor/kvm/gpudiscovery.sh | 44 ++++++++++--------- 3 files changed, 67 insertions(+), 32 deletions(-) diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java index b33f0179fc33..d3765c01ccbc 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java @@ -89,8 +89,8 @@ private void generatePciXml(StringBuilder gpuBuilder) { if (busAddress != null && !busAddress.isEmpty()) { String[] parts = busAddress.split(":"); - String slotFunction = null; try { + String slotFunction; if (parts.length == 3) { domainVal = Integer.parseInt(parts[0], 16); busVal = Integer.parseInt(parts[1], 16); @@ -98,18 +98,18 @@ private void generatePciXml(StringBuilder gpuBuilder) { } else if (parts.length == 2) { busVal = Integer.parseInt(parts[0], 16); slotFunction = parts[1]; + } else { + throw new IllegalArgumentException("Invalid PCI bus address format: '" + busAddress + "'"); } - if (slotFunction != null) { - String[] slotFunctionParts = slotFunction.split("\\."); - if (slotFunctionParts.length > 0) { - slotVal = Integer.parseInt(slotFunctionParts[0], 16); - if (slotFunctionParts.length > 1) { - funcVal = Integer.parseInt(slotFunctionParts[1].trim(), 16); - } - } + String[] sf = slotFunction.split("\\."); + if (sf.length == 2) { + slotVal = Integer.parseInt(sf[0], 16); + funcVal = Integer.parseInt(sf[1].trim(), 16); + } else { + throw new IllegalArgumentException("Invalid PCI bus address format: '" + busAddress + "'"); } } catch (NumberFormatException e) { - // leave all values at 0 (safe defaults) + throw new IllegalArgumentException("Invalid PCI bus address format: '" + busAddress + "'", e); } } @@ -119,7 +119,7 @@ private void generatePciXml(StringBuilder gpuBuilder) { String function = String.format("0x%x", funcVal); gpuBuilder.append("
\n"); + .append(slot).append("' function='").append(function).append("'/>\n"); gpuBuilder.append(" \n"); gpuBuilder.append("\n"); } diff --git a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDefTest.java b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDefTest.java index efdc740fa67b..e6f63e852f85 100644 --- a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDefTest.java +++ b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDefTest.java @@ -221,6 +221,39 @@ public void testGpuDef_withLegacyShortBdfDefaultsDomainToZero() { assertTrue(gpuXml.contains("
")); } + @Test + public void testGpuDef_withInvalidBusAddressThrows() { + String[] invalidAddresses = { + "notahex:00.0", // non-hex bus + "gg:00:02.0", // non-hex domain + "00:02:03:04", // too many colon-separated parts + "00", // missing slot/function + "00:02", // missing function (no dot) + "00:02.0.1", // extra dot in ss.f + }; + for (String addr : invalidAddresses) { + LibvirtGpuDef gpuDef = new LibvirtGpuDef(); + VgpuTypesInfo info = new VgpuTypesInfo( + GpuDevice.DeviceType.PCI, + "passthrough", + "passthrough", + addr, + "10de", + "NVIDIA Corporation", + "1b38", + "Tesla T4" + ); + gpuDef.defGpu(info); + try { + String ignored = gpuDef.toString(); + fail("Expected IllegalArgumentException for address: " + addr + " but got: " + ignored); + } catch (IllegalArgumentException e) { + assertTrue("Exception message should contain the bad address", + e.getMessage().contains(addr)); + } + } + } + @Test public void testGpuDef_withNullDeviceType() { LibvirtGpuDef gpuDef = new LibvirtGpuDef(); diff --git a/scripts/vm/hypervisor/kvm/gpudiscovery.sh b/scripts/vm/hypervisor/kvm/gpudiscovery.sh index 3ee902503347..0c21669f03ba 100755 --- a/scripts/vm/hypervisor/kvm/gpudiscovery.sh +++ b/scripts/vm/hypervisor/kvm/gpudiscovery.sh @@ -448,16 +448,12 @@ parse_nvidia_vgpu_profiles() { # normalize_pci_address output used at lookup time. Short # addresses ("AF:00.0") are widened by prepending domain 0. if [[ $gpu_address =~ ^([0-9A-Fa-f]+):([0-9A-Fa-f]+):([0-9A-Fa-f]+)\.([0-9A-Fa-f]+)$ ]]; then - gpu_address=$(printf "%04x:%s:%s.%s" \ - "0x${BASH_REMATCH[1]}" \ - "${BASH_REMATCH[2],,}" \ - "${BASH_REMATCH[3],,}" \ - "${BASH_REMATCH[4],,}") + gpu_address=$(printf "%04x:%02x:%02x.%x" \ + "0x${BASH_REMATCH[1]}" "0x${BASH_REMATCH[2]}" \ + "0x${BASH_REMATCH[3]}" "0x${BASH_REMATCH[4]}") elif [[ $gpu_address =~ ^([0-9A-Fa-f]+):([0-9A-Fa-f]+)\.([0-9A-Fa-f]+)$ ]]; then - gpu_address=$(printf "0000:%s:%s.%s" \ - "${BASH_REMATCH[1],,}" \ - "${BASH_REMATCH[2],,}" \ - "${BASH_REMATCH[3],,}") + gpu_address=$(printf "0000:%02x:%02x.%x" \ + "0x${BASH_REMATCH[1]}" "0x${BASH_REMATCH[2]}" "0x${BASH_REMATCH[3]}") else gpu_address="${gpu_address,,}" fi @@ -557,15 +553,15 @@ get_nvidia_profile_info() { parse_pci_address() { local addr="$1" if [[ $addr =~ ^([0-9A-Fa-f]+):([0-9A-Fa-f]+):([0-9A-Fa-f]+)\.([0-9A-Fa-f]+)$ ]]; then - DOMAIN="0x${BASH_REMATCH[1]}" - BUS="0x${BASH_REMATCH[2]}" - SLOT="0x${BASH_REMATCH[3]}" - FUNC="0x${BASH_REMATCH[4]}" + DOMAIN=$(printf "0x%04x" "0x${BASH_REMATCH[1]}") + BUS=$(printf "0x%02x" "0x${BASH_REMATCH[2]}") + SLOT=$(printf "0x%02x" "0x${BASH_REMATCH[3]}") + FUNC=$(printf "0x%x" "0x${BASH_REMATCH[4]}") elif [[ $addr =~ ^([0-9A-Fa-f]+):([0-9A-Fa-f]+)\.([0-9A-Fa-f]+)$ ]]; then DOMAIN="0x0000" - BUS="0x${BASH_REMATCH[1]}" - SLOT="0x${BASH_REMATCH[2]}" - FUNC="0x${BASH_REMATCH[3]}" + BUS=$(printf "0x%02x" "0x${BASH_REMATCH[1]}") + SLOT=$(printf "0x%02x" "0x${BASH_REMATCH[2]}") + FUNC=$(printf "0x%x" "0x${BASH_REMATCH[3]}") else DOMAIN="0x0000" BUS="0x00" @@ -574,14 +570,20 @@ parse_pci_address() { fi } -# Normalize a PCI address to its full domain-qualified form ("dddd:bb:ss.f"). -# Short addresses are widened by prepending domain "0000". +# Normalize a PCI address to its canonical full domain-qualified form +# ("dddd:bb:ss.f", lowercase, zero-padded). Both "dddd:bb:ss.f" (full) and +# "bb:ss.f" (short, domain 0) inputs are accepted. normalize_pci_address() { local addr="$1" - if [[ $addr =~ ^[0-9A-Fa-f]+:[0-9A-Fa-f]+:[0-9A-Fa-f]+\.[0-9A-Fa-f]+$ ]]; then - echo "$addr" + if [[ $addr =~ ^([0-9A-Fa-f]+):([0-9A-Fa-f]+):([0-9A-Fa-f]+)\.([0-9A-Fa-f]+)$ ]]; then + printf "%04x:%02x:%02x.%x\n" \ + "0x${BASH_REMATCH[1]}" "0x${BASH_REMATCH[2]}" \ + "0x${BASH_REMATCH[3]}" "0x${BASH_REMATCH[4]}" + elif [[ $addr =~ ^([0-9A-Fa-f]+):([0-9A-Fa-f]+)\.([0-9A-Fa-f]+)$ ]]; then + printf "0000:%02x:%02x.%x\n" \ + "0x${BASH_REMATCH[1]}" "0x${BASH_REMATCH[2]}" "0x${BASH_REMATCH[3]}" else - echo "0000:$addr" + echo "$addr" fi } From 4e7a8346b7428889b5f286ede006882d5c2a9df0 Mon Sep 17 00:00:00 2001 From: vishesh92 Date: Wed, 8 Apr 2026 16:08:27 +0530 Subject: [PATCH 6/6] Address comments --- scripts/vm/hypervisor/kvm/gpudiscovery.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/vm/hypervisor/kvm/gpudiscovery.sh b/scripts/vm/hypervisor/kvm/gpudiscovery.sh index 0c21669f03ba..524f86241860 100755 --- a/scripts/vm/hypervisor/kvm/gpudiscovery.sh +++ b/scripts/vm/hypervisor/kvm/gpudiscovery.sh @@ -704,7 +704,7 @@ for VM in "${VMS[@]}"; do # -- PCI hostdevs: use xmlstarlet to extract the full domain:bus:slot.func # for all PCI host devices. libvirt's
element may omit the domain # attribute, in which case we default to 0. - while read -r dom bus slot func; do + while IFS=: read -r dom bus slot func; do [[ -n "$bus" && -n "$slot" && -n "$func" ]] || continue [[ -n "$dom" ]] || dom="0" # Format to match lspci -D output (e.g., 0000:01:00.0) by padding with zeros @@ -715,9 +715,9 @@ for VM in "${VMS[@]}"; do BDF="${dom_fmt}:${bus_fmt}:${slot_fmt}.${func_fmt}" pci_to_vm["$BDF"]="$VM" done < <(echo "$xml" | xmlstarlet sel -T -t -m "//hostdev[@type='pci']/source/address" \ - -v "substring-after(@domain, '0x')" -o " " \ - -v "substring-after(@bus, '0x')" -o " " \ - -v "substring-after(@slot, '0x')" -o " " \ + -v "substring-after(@domain, '0x')" -o ":" \ + -v "substring-after(@bus, '0x')" -o ":" \ + -v "substring-after(@slot, '0x')" -o ":" \ -v "substring-after(@function, '0x')" -n 2>/dev/null || true) # -- MDEV hostdevs: use xmlstarlet to extract UUIDs -- @@ -989,6 +989,8 @@ for LINE in "${LINES[@]}"; do if [[ ${#vlist[@]} -eq 0 && ${#flist[@]} -eq 0 ]]; then FP_ENABLED=1 fi + + # Sets global DOMAIN/BUS/SLOT/FUNC for JSON output below parse_pci_address "$PCI_ADDR" # Emit JSON