Actual source code: cufft.cu


  2: /*
  3:     Provides an interface to the CUFFT package.
  4:     Testing examples can be found in ~src/mat/tests
  5: */

  7: #include <petscdevice.h>
  8: #include <petsc/private/matimpl.h>

 10: typedef struct {
 11:   PetscInt     ndim;
 12:   PetscInt     *dim;
 13:   cufftHandle  p_forward, p_backward;
 14:   cufftComplex *devArray;
 15: } Mat_CUFFT;

 17: PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y)
 18: {
 19:   Mat_CUFFT    *cufft    = (Mat_CUFFT*) A->data;
 20:   cufftComplex *devArray = cufft->devArray;
 21:   PetscInt      ndim     = cufft->ndim, *dim = cufft->dim;
 22:   PetscScalar  *x_array, *y_array;

 24:   VecGetArray(x, &x_array);
 25:   VecGetArray(y, &y_array);
 26:   if (!cufft->p_forward) {
 27:     /* create a plan, then execute it */
 28:     switch (ndim) {
 29:     case 1:
 30:       cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1);
 31:       break;
 32:     case 2:
 33:       cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C);
 34:       break;
 35:     case 3:
 36:       cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C);
 37:       break;
 38:     default:
 39:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim);
 40:     }
 41:   }
 42:   /* transfer to GPU memory */
 43:   cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);
 44:   /* execute transform */
 45:   cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD);
 46:   /* transfer from GPU memory */
 47:   cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);
 48:   VecRestoreArray(y, &y_array);
 49:   VecRestoreArray(x, &x_array);
 50:   return 0;
 51: }

 53: PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y)
 54: {
 55:   Mat_CUFFT    *cufft    = (Mat_CUFFT*) A->data;
 56:   cufftComplex *devArray = cufft->devArray;
 57:   PetscInt      ndim     = cufft->ndim, *dim = cufft->dim;
 58:   PetscScalar  *x_array, *y_array;

 60:   VecGetArray(x, &x_array);
 61:   VecGetArray(y, &y_array);
 62:   if (!cufft->p_backward) {
 63:     /* create a plan, then execute it */
 64:     switch (ndim) {
 65:     case 1:
 66:       cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1);
 67:       break;
 68:     case 2:
 69:       cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C);
 70:       break;
 71:     case 3:
 72:       cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C);
 73:       break;
 74:     default:
 75:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim);
 76:     }
 77:   }
 78:   /* transfer to GPU memory */
 79:   cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);
 80:   /* execute transform */
 81:   cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE);
 82:   /* transfer from GPU memory */
 83:   cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);
 84:   VecRestoreArray(y, &y_array);
 85:   VecRestoreArray(x, &x_array);
 86:   return 0;
 87: }

 89: PetscErrorCode MatDestroy_SeqCUFFT(Mat A)
 90: {
 91:   Mat_CUFFT *cufft = (Mat_CUFFT*) A->data;

 93:   PetscFree(cufft->dim);
 94:   if (cufft->p_forward)  cufftDestroy(cufft->p_forward);
 95:   if (cufft->p_backward) cufftDestroy(cufft->p_backward);
 96:   cudaFree(cufft->devArray);
 97:   PetscFree(A->data);
 98:   PetscObjectChangeTypeName((PetscObject)A,0);
 99:   return 0;
100: }

102: /*@
103:   MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT

105:   Collective

107:   Input Parameters:
108: + comm - MPI communicator, set to PETSC_COMM_SELF
109: . ndim - the ndim-dimensional transform
110: - dim  - array of size ndim, dim[i] contains the vector length in the i-dimension

112:   Output Parameter:
113: . A - the matrix

115:   Options Database Keys:
116: . -mat_cufft_plannerflags - set CUFFT planner flags

118:   Level: intermediate
119: @*/
120: PetscErrorCode  MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A)
121: {
122:   Mat_CUFFT *cufft;
123:   PetscInt   m = 1;

128:   MatCreate(comm, A);
129:   for (PetscInt d = 0; d < ndim; ++d) {
131:     m *= dim[d];
132:   }
133:   MatSetSizes(*A, m, m, m, m);
134:   PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT);

136:   PetscNewLog(*A,&cufft);
137:   (*A)->data = (void*) cufft;
138:   PetscMalloc1(ndim+1, &cufft->dim);
139:   PetscArraycpy(cufft->dim, dim, ndim);

141:   cufft->ndim       = ndim;
142:   cufft->p_forward  = 0;
143:   cufft->p_backward = 0;
144:   cufft->dim[ndim]  = m;

146:   /* GPU memory allocation */
147:   cudaMalloc((void**) &cufft->devArray, sizeof(cufftComplex)*m);

149:   (*A)->ops->mult          = MatMult_SeqCUFFT;
150:   (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT;
151:   (*A)->assembled          = PETSC_TRUE;
152:   (*A)->ops->destroy       = MatDestroy_SeqCUFFT;

154:   /* get runtime options ...what options????? */
155:   {

158:     PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");
159:     PetscOptionsEnd();
160:   }
161:   return 0;
162: }