为什么即使我的系统中安装了 nvcc,我仍无法运行基本的 CUDA 程序?
my_username@192:~$ nvidia-smi
Sun Dec 3 10:23:33 2023
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.223.02 Driver Version: 470.223.02 CUDA Version: 11.4 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 NVIDIA GeForce ... Off | 00000000:03:00.0 N/A | N/A |
| 33% 28C P8 N/A / N/A | 14MiB / 3020MiB | N/A Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
| 1 NVIDIA GeForce ... Off | 00000000:84:00.0 N/A | N/A |
| 33% 29C P8 N/A / N/A | 6MiB / 3022MiB | N/A Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| No running processes found |
+-----------------------------------------------------------------------------+
my_username@192:~$ nvcc
nvcc fatal : No input files specified; use option --help for more information
my_username@192:~$ nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2021 NVIDIA Corporation
Built on Mon_Oct_11_21:27:02_PDT_2021
Cuda compilation tools, release 11.4, V11.4.152
Build cuda_11.4.r11.4/compiler.30521435_0
my_username@192:~$ nano test_cuda_installation.sh
my_username@192:~$ ./test_cuda_installation.sh
0 + 0 = 0
1 + 1 = 0
2 + 2 = 0
3 + 3 = 0
4 + 4 = 0
5 + 5 = 0
6 + 6 = 0
7 + 7 = 0
8 + 8 = 0
9 + 9 = 0
my_username@192:~$
my_username@192:~$ cat test_cuda_installation.sh
#!/bin/bash
# Create a CUDA source file
cat > test.cu << EOF
#include <stdio.h>
// CUDA Kernel function to add the elements of two arrays on the GPU
__global__ void add(int *a, int *b, int *c) {
int index = threadIdx.x;
c[index] = a[index] + b[index];
}
#define N 10 // Size of the vectors
int main(void) {
int a[N], b[N], c[N];
int *dev_a, *dev_b, *dev_c;
// allocate the memory on the GPU
cudaMalloc((void**)&dev_a, N * sizeof(int));
cudaMalloc((void**)&dev_b, N * sizeof(int));
cudaMalloc((void**)&dev_c, N * sizeof(int));
// fill the arrays 'a' and 'b' on the CPU
for (int i = 0; i < N; i++) {
a[i] = i;
b[i] = i;
}
// copy the arrays 'a' and 'b' to the GPU
cudaMemcpy(dev_a, a, N * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(dev_b, b, N * sizeof(int), cudaMemcpyHostToDevice);
add<<<1, N>>>(dev_a, dev_b, dev_c);
// copy the array 'c' back from the GPU to the CPU
cudaMemcpy(c, dev_c, N * sizeof(int), cudaMemcpyDeviceToHost);
// display the results
for (int i = 0; i < N; i++) {
printf("%d + %d = %d\n", a[i], b[i], c[i]);
}
// free the memory allocated on the GPU
cudaFree(dev_a);
cudaFree(dev_b);
cudaFree(dev_c);
return 0;
}
EOF
# Compile the CUDA source file
nvcc test.cu -o test
# Run the compiled executable
./test
my_username@192:~$