#include #include // GPU kernel function - runs on the GPU __global__ void helloFromGPU() { // Get thread and block indices int threadId = blockIdx.x * blockDim.x + threadIdx.x; // Print hello world from each GPU thread printf("Hello World from GPU thread %d!\n", threadId); } int main() { printf("Hello World from CPU!\n"); // Launch the kernel with 1 block of 10 threads helloFromGPU<<<1, 10>>>(); // Wait for GPU to finish before accessing results cudaDeviceSynchronize(); // Check for any errors cudaError_t error = cudaGetLastError(); if (error != cudaSuccess) { printf("CUDA error: %s\n", cudaGetErrorString(error)); return -1; } printf("GPU kernel execution completed!\n"); return 0; }