// file: /courses/temple/ece_4822/lectures/current/lecture_12/example.cu // // This file contains my first CUDA program. Note that the file extension // is ".cu". // // To compile this program: // // nvcc -o example.exe example.cu // // This program prints "hello world" to the terminal and runs on a GPU. // // include files // #include "example.h" int main(void) { // case no. 1: hello world // mykernel<<<1,1>>>(); cudaDeviceSynchronize(); fprintf(stdout, "main program: hello world from the CPU / main program!\n"); // case no. 2: add // int a, b, c; int *d_a, *d_b, *d_c; int size = sizeof(int); // allocate space for device copies of a, b, c // cudaMalloc((void **)&d_a, size); cudaMalloc((void **)&d_b, size); cudaMalloc((void **)&d_c, size); // set the values of a and b // a = 1; b = 27; // copy inputs to device // cudaMemcpy(d_a, &a, size, cudaMemcpyHostToDevice); cudaMemcpy(d_b, &b, size, cudaMemcpyHostToDevice); // launch add() kernel on GPU // myadd<<<1,1>>>(d_c, d_a, d_b); // copy result back to host // cudaMemcpy(&c, d_c, size, cudaMemcpyDeviceToHost); // display the result // cudaDeviceSynchronize(); fprintf(stdout, "myadd: %d + %d = %d\n", a, b, c); // cleanup // cudaFree(d_a); cudaFree(d_b); cudaFree(d_c); // exit gracefully // return 0; }