CuFFT
FFT Example
/**
* @remarks
* Job process: @n
* src(HOST) > temp(HOST) > input(DEVICD) > DFT > output(DEVICE) > temp(HOST) > dest(HOST)
*/
bool dft1dForward(gpuComplex const * src, gpuComplex * dest, int const & size)
{
cufftComplex * input;
cufftComplex * output;
cufftComplex * temp;
cufftHandle plan;
int const ALLOC_SIZE = sizeof(cufftComplex) * size;
int const BATCH = 1;
cudaMalloc((void**)&input, ALLOC_SIZE);
cudaMalloc((void**)&output, ALLOC_SIZE);
cudaMallocHost((void**)&temp, ALLOC_SIZE);
cufftPlan1d(&plan, size, CUFFT_C2C, BATCH);
// src(HOST) to temp(HOST).
for (int i = 0; i < size; ++i) {
(temp + i)->x = (src + i)->real;
(temp + i)->y = (src + i)->imag;
}
// temp(HOST) to input(DEVICE).
cudaMemcpy(input, temp, ALLOC_SIZE, cudaMemcpyHostToDevice);
// run FFT: input(DEVICE) to output(DEVICE).
cufftExecC2C(plan, input, output, CUFFT_FORWARD);
cudaDeviceSynchronize();
// output(DEVICE) to temp(HOST).
cudaMemcpy(temp, output, ALLOC_SIZE, cudaMemcpyDeviceToHost);
// temp(HOST) to dest(HOST).
for (int i = 0; i < size; ++i) {
(dest + i)->real = (temp + i)->x;
(dest + i)->imag = (temp + i)->y;
}
cufftDestroy(plan);
cudaFreeHost(temp);
cudaFree(input);
cudaFree(output);
return true;
}
See also
Favorite site