Skip to content

CuFFT

FFT Example

/**
 * @remarks
 *  Job process: @n
 *   src(HOST) > temp(HOST) > input(DEVICD) > DFT > output(DEVICE) > temp(HOST) > dest(HOST)
 */
bool dft1dForward(gpuComplex const * src, gpuComplex * dest, int const & size)
{
    cufftComplex * input;
    cufftComplex * output;
    cufftComplex * temp;
    cufftHandle plan;

    int const ALLOC_SIZE = sizeof(cufftComplex) * size;
    int const BATCH = 1;

    cudaMalloc((void**)&input,  ALLOC_SIZE);
    cudaMalloc((void**)&output, ALLOC_SIZE);
    cudaMallocHost((void**)&temp, ALLOC_SIZE);
    cufftPlan1d(&plan, size, CUFFT_C2C, BATCH);

    // src(HOST) to temp(HOST).
    for (int i = 0; i < size; ++i) {
        (temp + i)->x = (src + i)->real;
        (temp + i)->y = (src + i)->imag;
    }

    // temp(HOST) to input(DEVICE).
    cudaMemcpy(input, temp, ALLOC_SIZE, cudaMemcpyHostToDevice);

    // run FFT: input(DEVICE) to output(DEVICE).
    cufftExecC2C(plan, input, output, CUFFT_FORWARD);
    cudaDeviceSynchronize();

    // output(DEVICE) to temp(HOST).
    cudaMemcpy(temp, output, ALLOC_SIZE, cudaMemcpyDeviceToHost);

    // temp(HOST) to dest(HOST).
    for (int i = 0; i < size; ++i) {
        (dest + i)->real = (temp + i)->x;
        (dest + i)->imag = (temp + i)->y;
    }

    cufftDestroy(plan);
    cudaFreeHost(temp);
    cudaFree(input);
    cudaFree(output);

    return true;
}

See also

Favorite site