diff --git a/e2e2/test/cases/nvidia/manifests/job-unit-test-single-node.yaml b/e2e2/test/cases/nvidia/manifests/job-unit-test-single-node.yaml index 2bfa61640..359c55a46 100644 --- a/e2e2/test/cases/nvidia/manifests/job-unit-test-single-node.yaml +++ b/e2e2/test/cases/nvidia/manifests/job-unit-test-single-node.yaml @@ -21,6 +21,7 @@ spec: limits: cpu: "4" memory: 4Gi + nvidia.com/gpu: {{.GpuPerNode}} requests: cpu: "1" memory: 1Gi diff --git a/e2e2/test/images/nvidia/Dockerfile b/e2e2/test/images/nvidia/Dockerfile index b947df5ed..db7ec5127 100644 --- a/e2e2/test/images/nvidia/Dockerfile +++ b/e2e2/test/images/nvidia/Dockerfile @@ -41,7 +41,8 @@ RUN apt install -y \ cmake \ apt-utils \ libhwloc-dev \ - cuda-demo-suite-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} + cuda-demo-suite-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ + datacenter-gpu-manager RUN mkdir -p /var/run/sshd \ && sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config \ diff --git a/e2e2/test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5.8xlarge/gpu_count.txt b/e2e2/test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5.8xlarge/gpu_count.txt new file mode 100644 index 000000000..1eb7323a4 --- /dev/null +++ b/e2e2/test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5.8xlarge/gpu_count.txt @@ -0,0 +1,2 @@ +name, index, pci.bus_id +NVIDIA A10G, 0, 00000000:00:1E.0 diff --git a/e2e2/test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5.8xlarge/numa_topo.txt b/e2e2/test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5.8xlarge/numa_topo.txt new file mode 100644 index 000000000..bf254e2ba --- /dev/null +++ b/e2e2/test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5.8xlarge/numa_topo.txt @@ -0,0 +1,2 @@ +/sys/devices/system/node/node0/cpulist:0-31 +/sys/devices/system/node/node0/distance:10 diff --git a/e2e2/test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5.8xlarge/nvidia_persistence_status.txt b/e2e2/test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5.8xlarge/nvidia_persistence_status.txt new file mode 100644 index 000000000..ce8d63903 --- /dev/null +++ b/e2e2/test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5.8xlarge/nvidia_persistence_status.txt @@ -0,0 +1,2 @@ +name, pci.bus_id, persistence_mode +NVIDIA A10G, 00000000:00:1E.0, Enabled diff --git a/e2e2/test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5.8xlarge/nvidia_smi_topo.txt b/e2e2/test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5.8xlarge/nvidia_smi_topo.txt new file mode 100644 index 000000000..43547e732 --- /dev/null +++ b/e2e2/test/images/nvidia/gpu_unit_tests/tests/test_sysinfo.sh.data/g5.8xlarge/nvidia_smi_topo.txt @@ -0,0 +1,2 @@ + GPU0 CPU Affinity NUMA Affinity GPU NUMA ID +GPU0 X 0-31 0 N/A