array.cu 3.41 KB
Newer Older
Oleg Dzhimiev's avatar
Oleg Dzhimiev committed
1 2 3 4 5 6 7 8 9 10 11
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdio.h>

// this is the program that is to be run on the device for a
// large number of threads, in our example 100
// each thread takes care of one entry in the number array,
// so in order for the thread to know which number to manipulate,
// a scheme has to be utilized in order to assign each thread a
// unique number

Oleg Dzhimiev's avatar
Oleg Dzhimiev committed
12
__global__ void incrementArrayViaCUDAdevice(uint8_t *numberArray, int N)
Oleg Dzhimiev's avatar
Oleg Dzhimiev committed
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
{
    // this is the assignment of a unique identifier.
    // blockIdx.x is the unique number of the block, in which the
    // thread is positioned, blockDim.x holds the number of threads
    // for each block and threadIdx.x is the number of the thread in
    // this block.
    int idx = blockIdx.x*blockDim.x + threadIdx.x;

    // this tells the thread to manipulate the assigned number in
    // the array stored in device memory and increment it
    if (idx<N)
        numberArray[idx] = numberArray[idx] + 1;
}

// this is the "normal" function to be run on the CPU
// it does the exact same thing as the CUDA function above
Oleg Dzhimiev's avatar
Oleg Dzhimiev committed
29
void incrementArray(uint8_t *numberArray, int N){
Oleg Dzhimiev's avatar
Oleg Dzhimiev committed
30 31 32 33 34 35 36 37 38

    // go through every number in the array consecutively
    // and increment it
    for(int i=0; i<N; ++i)
    {
        numberArray[i] = numberArray[i] + 1;
    }
}

Oleg Dzhimiev's avatar
Oleg Dzhimiev committed
39
int myCreateCUDAArray(uint8_t *tf_ptr){
Oleg Dzhimiev's avatar
Oleg Dzhimiev committed
40 41 42 43
    // some arbitrary array length
    int numberOfNumbers = 100;

    // declare some arrays for storing numbers
Oleg Dzhimiev's avatar
Oleg Dzhimiev committed
44 45
    uint8_t *numbers1;
    uint8_t *numbers2;
Oleg Dzhimiev's avatar
Oleg Dzhimiev committed
46 47

    // reserve (allocate) some working space for the numbers in device memory
Oleg Dzhimiev's avatar
Oleg Dzhimiev committed
48 49 50 51 52 53

    // do not malloc for tf_ptr
    //cudaMalloc(&tf_ptr,sizeof(uint8_t)*numberOfNumbers);

    cudaMallocManaged(&numbers1, sizeof(uint8_t)*numberOfNumbers);
    cudaMallocManaged(&numbers2, sizeof(uint8_t)*numberOfNumbers);
Oleg Dzhimiev's avatar
Oleg Dzhimiev committed
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72

    // fill the input array with some numbers
    for(int i=0;i<numberOfNumbers;i++)
    {
        numbers1[i] = i;    // this will be manipulated by the CUDA device (GPU)
        numbers2[i] = i;    // this will be manipulated by the CPU (as any standard C program would do)
    }

    // tell the device (GPU) to do its magic
    incrementArrayViaCUDAdevice<<<1, numberOfNumbers>>>(numbers1, numberOfNumbers);

    // wait for the device to finish working
    cudaDeviceSynchronize();

    // compute the same function "normally" on the CPU
    incrementArray(numbers2, numberOfNumbers);

    // check if the GPU did the same as the CPU
    bool workedCorrectly = true;
Oleg Dzhimiev's avatar
Oleg Dzhimiev committed
73
    printf("CUDA kernel simple incrementing test:\n");
Oleg Dzhimiev's avatar
Oleg Dzhimiev committed
74 75 76 77 78 79 80 81 82 83 84 85 86 87
    for(int i=0;i<numberOfNumbers;i++)
    {
        if (numbers1[i] != numbers2[i])
            workedCorrectly = 0;

        printf(" %d vs %d |",numbers1[i],numbers2[i]);
    }
    printf("\n");

    if (workedCorrectly == 1)
        printf("The device performed well!\n");
    else
        printf("Something went wrong. The output numbers are not what was to be expected...\n");

Oleg Dzhimiev's avatar
Oleg Dzhimiev committed
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
    // copy staff
    cudaMemcpy(tf_ptr,numbers1,numberOfNumbers,cudaMemcpyDeviceToDevice);

    /*
    uint8_t *numbers3;
    cudaMallocManaged(&numbers3, sizeof(uint8_t)*numberOfNumbers);
    cudaMemcpy(numbers3,numberz1,numberOfNumbers,cudaMemcpyDeviceToDevice);
    cudaDeviceSynchronize();

    for(int i=0;i<numberOfNumbers;i++){
        printf("%d|",numbers3[i]);
    }
    printf("\n");
    */

Oleg Dzhimiev's avatar
Oleg Dzhimiev committed
103 104 105 106 107 108 109
    // free the space that has been used by our arrays so that
    // other programs might use it
    cudaFree(numbers1);
    cudaFree(numbers2);

    return 0;
}