Actual source code: fftw.c

  1: /*
  2:     Provides an interface to the FFTW package.
  3:     Testing examples can be found in ~src/mat/tests
  4: */

  6: #include <../src/mat/impls/fft/fft.h>
  7: EXTERN_C_BEGIN
  8: #if !PetscDefined(HAVE_MPIUNI)
  9:   #include <fftw3-mpi.h>
 10: #else
 11:   #include <fftw3.h>
 12: #endif
 13: EXTERN_C_END

 15: typedef struct {
 16:   ptrdiff_t ndim_fftw, *dim_fftw;
 17: #if defined(PETSC_USE_64BIT_INDICES)
 18:   fftw_iodim64 *iodims;
 19: #else
 20:   fftw_iodim *iodims;
 21: #endif
 22:   PetscInt     partial_dim;
 23:   fftw_plan    p_forward, p_backward;
 24:   unsigned     p_flag;                                      /* planner flags, FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
 25:   PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw_execute() can only be executed for the arrays with which the plan was created */
 26: } Mat_FFTW;

 28: extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);

 30: static PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
 31: {
 32:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
 33:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
 34:   const PetscScalar *x_array;
 35:   PetscScalar       *y_array;
 36:   Vec                xx;
 37: #if defined(PETSC_USE_COMPLEX)
 38:   #if defined(PETSC_USE_64BIT_INDICES)
 39:   fftw_iodim64 *iodims;
 40:   #else
 41:   fftw_iodim *iodims;
 42:   #endif
 43:   PetscInt i;
 44: #endif
 45:   PetscInt ndim = fft->ndim, *dim = fft->dim;

 47:   PetscFunctionBegin;
 48:   if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
 49:     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
 50:     PetscCall(VecDuplicate(x, &xx));
 51:     PetscCall(VecGetArrayRead(xx, &x_array));
 52:   } else {
 53:     PetscCall(VecGetArrayRead(x, &x_array));
 54:   }
 55:   PetscCall(VecGetArray(y, &y_array));
 56:   if (!fftw->p_forward) { /* create a plan, then execute it */
 57:     switch (ndim) {
 58:     case 1:
 59: #if defined(PETSC_USE_COMPLEX)
 60:       fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
 61: #else
 62:       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
 63: #endif
 64:       break;
 65:     case 2:
 66: #if defined(PETSC_USE_COMPLEX)
 67:       fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
 68: #else
 69:       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
 70: #endif
 71:       break;
 72:     case 3:
 73: #if defined(PETSC_USE_COMPLEX)
 74:       fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
 75: #else
 76:       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
 77: #endif
 78:       break;
 79:     default:
 80: #if defined(PETSC_USE_COMPLEX)
 81:       iodims = fftw->iodims;
 82:   #if defined(PETSC_USE_64BIT_INDICES)
 83:       if (ndim) {
 84:         iodims[ndim - 1].n  = (ptrdiff_t)dim[ndim - 1];
 85:         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
 86:         for (i = ndim - 2; i >= 0; --i) {
 87:           iodims[i].n  = (ptrdiff_t)dim[i];
 88:           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
 89:         }
 90:       }
 91:       fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
 92:   #else
 93:       if (ndim) {
 94:         iodims[ndim - 1].n  = (int)dim[ndim - 1];
 95:         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
 96:         for (i = ndim - 2; i >= 0; --i) {
 97:           iodims[i].n  = (int)dim[i];
 98:           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
 99:         }
100:       }
101:       fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
102:   #endif

104: #else
105:       fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
106: #endif
107:       break;
108:     }
109:     if (fftw->p_flag != FFTW_ESTIMATE) {
110:       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
111:       PetscCall(VecRestoreArrayRead(xx, &x_array));
112:       PetscCall(VecDestroy(&xx));
113:       PetscCall(VecGetArrayRead(x, &x_array));
114:     } else {
115:       fftw->finarray  = (PetscScalar *)x_array;
116:       fftw->foutarray = y_array;
117:     }
118:   }

120:   if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
121: #if defined(PETSC_USE_COMPLEX)
122:     fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
123: #else
124:     fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
125: #endif
126:   } else {
127:     fftw_execute(fftw->p_forward);
128:   }
129:   PetscCall(VecRestoreArray(y, &y_array));
130:   PetscCall(VecRestoreArrayRead(x, &x_array));
131:   PetscFunctionReturn(PETSC_SUCCESS);
132: }

134: static PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
135: {
136:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
137:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
138:   const PetscScalar *x_array;
139:   PetscScalar       *y_array;
140:   PetscInt           ndim = fft->ndim, *dim = fft->dim;
141:   Vec                xx;
142: #if defined(PETSC_USE_COMPLEX)
143:   #if defined(PETSC_USE_64BIT_INDICES)
144:   fftw_iodim64 *iodims = fftw->iodims;
145:   #else
146:   fftw_iodim *iodims = fftw->iodims;
147:   #endif
148: #endif

150:   PetscFunctionBegin;
151:   if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
152:     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
153:     PetscCall(VecDuplicate(x, &xx));
154:     PetscCall(VecGetArrayRead(xx, &x_array));
155:   } else {
156:     PetscCall(VecGetArrayRead(x, &x_array));
157:   }
158:   PetscCall(VecGetArray(y, &y_array));
159:   if (!fftw->p_backward) { /* create a plan, then execute it */
160:     switch (ndim) {
161:     case 1:
162: #if defined(PETSC_USE_COMPLEX)
163:       fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
164: #else
165:       fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
166: #endif
167:       break;
168:     case 2:
169: #if defined(PETSC_USE_COMPLEX)
170:       fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
171: #else
172:       fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
173: #endif
174:       break;
175:     case 3:
176: #if defined(PETSC_USE_COMPLEX)
177:       fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
178: #else
179:       fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
180: #endif
181:       break;
182:     default:
183: #if defined(PETSC_USE_COMPLEX)
184:   #if defined(PETSC_USE_64BIT_INDICES)
185:       fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
186:   #else
187:       fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
188:   #endif
189: #else
190:       fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
191: #endif
192:       break;
193:     }
194:     if (fftw->p_flag != FFTW_ESTIMATE) {
195:       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
196:       PetscCall(VecRestoreArrayRead(xx, &x_array));
197:       PetscCall(VecDestroy(&xx));
198:       PetscCall(VecGetArrayRead(x, &x_array));
199:     } else {
200:       fftw->binarray  = (PetscScalar *)x_array;
201:       fftw->boutarray = y_array;
202:     }
203:   }
204:   if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
205: #if defined(PETSC_USE_COMPLEX)
206:     fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
207: #else
208:     fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
209: #endif
210:   } else {
211:     fftw_execute(fftw->p_backward);
212:   }
213:   PetscCall(VecRestoreArray(y, &y_array));
214:   PetscCall(VecRestoreArrayRead(x, &x_array));
215:   PetscFunctionReturn(PETSC_SUCCESS);
216: }

218: #if !PetscDefined(HAVE_MPIUNI)
219: static PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
220: {
221:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
222:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
223:   const PetscScalar *x_array;
224:   PetscScalar       *y_array;
225:   PetscInt           ndim = fft->ndim, *dim = fft->dim;
226:   MPI_Comm           comm;
227:   Vec                xx;

229:   PetscFunctionBegin;
230:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
231:   if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
232:     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
233:     PetscCall(VecDuplicate(x, &xx));
234:     PetscCall(VecGetArrayRead(xx, &x_array));
235:   } else {
236:     PetscCall(VecGetArrayRead(x, &x_array));
237:   }
238:   PetscCall(VecGetArray(y, &y_array));
239:   if (!fftw->p_forward) { /* create a plan, then execute it */
240:     switch (ndim) {
241:     case 1:
242:   #if defined(PETSC_USE_COMPLEX)
243:       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
244:   #else
245:       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
246:   #endif
247:       break;
248:     case 2:
249:   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
250:       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
251:   #else
252:       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
253:   #endif
254:       break;
255:     case 3:
256:   #if defined(PETSC_USE_COMPLEX)
257:       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
258:   #else
259:       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
260:   #endif
261:       break;
262:     default:
263:   #if defined(PETSC_USE_COMPLEX)
264:       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
265:   #else
266:       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
267:   #endif
268:       break;
269:     }
270:     if (fftw->p_flag != FFTW_ESTIMATE) {
271:       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
272:       PetscCall(VecRestoreArrayRead(xx, &x_array));
273:       PetscCall(VecDestroy(&xx));
274:       PetscCall(VecGetArrayRead(x, &x_array));
275:     } else {
276:       fftw->finarray  = (PetscScalar *)x_array;
277:       fftw->foutarray = y_array;
278:     }
279:   }
280:   if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
281:     fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
282:   } else {
283:     fftw_execute(fftw->p_forward);
284:   }
285:   PetscCall(VecRestoreArray(y, &y_array));
286:   PetscCall(VecRestoreArrayRead(x, &x_array));
287:   PetscFunctionReturn(PETSC_SUCCESS);
288: }

290: static PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
291: {
292:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
293:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
294:   const PetscScalar *x_array;
295:   PetscScalar       *y_array;
296:   PetscInt           ndim = fft->ndim, *dim = fft->dim;
297:   MPI_Comm           comm;
298:   Vec                xx;

300:   PetscFunctionBegin;
301:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
302:   if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
303:     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
304:     PetscCall(VecDuplicate(x, &xx));
305:     PetscCall(VecGetArrayRead(xx, &x_array));
306:   } else {
307:     PetscCall(VecGetArrayRead(x, &x_array));
308:   }
309:   PetscCall(VecGetArray(y, &y_array));
310:   if (!fftw->p_backward) { /* create a plan, then execute it */
311:     switch (ndim) {
312:     case 1:
313:   #if defined(PETSC_USE_COMPLEX)
314:       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
315:   #else
316:       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
317:   #endif
318:       break;
319:     case 2:
320:   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
321:       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
322:   #else
323:       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
324:   #endif
325:       break;
326:     case 3:
327:   #if defined(PETSC_USE_COMPLEX)
328:       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
329:   #else
330:       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
331:   #endif
332:       break;
333:     default:
334:   #if defined(PETSC_USE_COMPLEX)
335:       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
336:   #else
337:       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
338:   #endif
339:       break;
340:     }
341:     if (fftw->p_flag != FFTW_ESTIMATE) {
342:       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
343:       PetscCall(VecRestoreArrayRead(xx, &x_array));
344:       PetscCall(VecDestroy(&xx));
345:       PetscCall(VecGetArrayRead(x, &x_array));
346:     } else {
347:       fftw->binarray  = (PetscScalar *)x_array;
348:       fftw->boutarray = y_array;
349:     }
350:   }
351:   if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
352:     fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
353:   } else {
354:     fftw_execute(fftw->p_backward);
355:   }
356:   PetscCall(VecRestoreArray(y, &y_array));
357:   PetscCall(VecRestoreArrayRead(x, &x_array));
358:   PetscFunctionReturn(PETSC_SUCCESS);
359: }
360: #endif

362: static PetscErrorCode MatDestroy_FFTW(Mat A)
363: {
364:   Mat_FFT  *fft  = (Mat_FFT *)A->data;
365:   Mat_FFTW *fftw = (Mat_FFTW *)fft->data;

367:   PetscFunctionBegin;
368:   fftw_destroy_plan(fftw->p_forward);
369:   fftw_destroy_plan(fftw->p_backward);
370:   if (fftw->iodims) free(fftw->iodims);
371:   PetscCall(PetscFree(fftw->dim_fftw));
372:   PetscCall(PetscFree(fft->data));
373:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
374:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
375:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
376: #if !PetscDefined(HAVE_MPIUNI)
377:   fftw_mpi_cleanup();
378: #endif
379:   PetscFunctionReturn(PETSC_SUCCESS);
380: }

382: #if !PetscDefined(HAVE_MPIUNI)
383: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
384: static PetscErrorCode VecDestroy_MPIFFTW(Vec v)
385: {
386:   PetscScalar *array;

388:   PetscFunctionBegin;
389:   PetscCall(VecGetArray(v, &array));
390:   fftw_free((fftw_complex *)array);
391:   PetscCall(VecRestoreArray(v, &array));
392:   PetscCall(VecDestroy_MPI(v));
393:   PetscFunctionReturn(PETSC_SUCCESS);
394: }
395: #endif

397: #if !PetscDefined(HAVE_MPIUNI)
398: static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
399: {
400:   Mat A;

402:   PetscFunctionBegin;
403:   PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
404:   PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
405:   PetscFunctionReturn(PETSC_SUCCESS);
406: }

408: static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
409: {
410:   Mat A;

412:   PetscFunctionBegin;
413:   PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
414:   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
415:   PetscFunctionReturn(PETSC_SUCCESS);
416: }

418: static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
419: {
420:   Mat A;

422:   PetscFunctionBegin;
423:   PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
424:   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
425:   PetscFunctionReturn(PETSC_SUCCESS);
426: }
427: #endif

429: /*@
430:   MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
431:   parallel layout determined by `MATFFTW`

433:   Collective

435:   Input Parameter:
436: . A - the matrix

438:   Output Parameters:
439: + x - (optional) input vector of forward FFTW
440: . y - (optional) output vector of forward FFTW
441: - z - (optional) output vector of backward FFTW

443:   Options Database Key:
444: . -mat_fftw_plannerflags - set FFTW planner flags

446:   Level: advanced

448:   Notes:
449:   The parallel layout of output of forward FFTW is always same as the input
450:   of the backward FFTW. But parallel layout of the input vector of forward
451:   FFTW might not be same as the output of backward FFTW.

453:   We need to provide enough space while doing parallel real transform.
454:   We need to pad extra zeros at the end of last dimension. For this reason the one needs to
455:   invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
456:   last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
457:   depends on if the last dimension is even or odd. If the last dimension is even need to pad two
458:   zeros if it is odd only one zero is needed.

460:   Lastly one needs some scratch space at the end of data set in each process. alloc_local
461:   figures out how much space is needed, i.e. it figures out the data+scratch space for
462:   each processor and returns that.

464:   Developer Notes:
465:   How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?

467: .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
468: @*/
469: PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
470: {
471:   PetscFunctionBegin;
472:   PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
473:   PetscFunctionReturn(PETSC_SUCCESS);
474: }

476: PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
477: {
478:   PetscMPIInt size, rank;
479:   MPI_Comm    comm;
480:   Mat_FFT    *fft = (Mat_FFT *)A->data;

482:   PetscFunctionBegin;
485:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));

487:   PetscCallMPI(MPI_Comm_size(comm, &size));
488:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
489:   if (size == 1) { /* sequential case */
490: #if defined(PETSC_USE_COMPLEX)
491:     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
492:     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
493:     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
494: #else
495:     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
496:     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
497:     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
498: #endif
499: #if !PetscDefined(HAVE_MPIUNI)
500:   } else { /* parallel cases */
501:     Mat_FFTW     *fftw = (Mat_FFTW *)fft->data;
502:     PetscInt      ndim = fft->ndim, *dim = fft->dim;
503:     ptrdiff_t     alloc_local, local_n0, local_0_start;
504:     ptrdiff_t     local_n1;
505:     fftw_complex *data_fout;
506:     ptrdiff_t     local_1_start;
507:   #if defined(PETSC_USE_COMPLEX)
508:     fftw_complex *data_fin, *data_bout;
509:   #else
510:     double   *data_finr, *data_boutr;
511:     PetscInt  n1, N1;
512:     ptrdiff_t temp;
513:   #endif

515:     switch (ndim) {
516:     case 1:
517:   #if !defined(PETSC_USE_COMPLEX)
518:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
519:   #else
520:       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
521:       if (fin) {
522:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
523:         PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin));
524:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
525:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
526:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
527:       }
528:       if (fout) {
529:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
530:         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout));
531:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
532:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
533:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
534:       }
535:       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
536:       if (bout) {
537:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
538:         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout));
539:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
540:         (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
541:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
542:       }
543:       break;
544:   #endif
545:     case 2:
546:   #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
547:       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
548:       N1          = 2 * dim[0] * (dim[1] / 2 + 1);
549:       n1          = 2 * local_n0 * (dim[1] / 2 + 1);
550:       if (fin) {
551:         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
552:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
553:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
554:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
555:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
556:       }
557:       if (fout) {
558:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
559:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
560:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
561:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
562:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
563:       }
564:       if (bout) {
565:         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
566:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
567:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
568:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
569:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
570:       }
571:   #else
572:       /* Get local size */
573:       alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
574:       if (fin) {
575:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
576:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
577:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
578:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
579:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
580:       }
581:       if (fout) {
582:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
583:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
584:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
585:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
586:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
587:       }
588:       if (bout) {
589:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
590:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
591:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
592:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
593:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
594:       }
595:   #endif
596:       break;
597:     case 3:
598:   #if !defined(PETSC_USE_COMPLEX)
599:       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
600:       N1          = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1);
601:       n1          = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1);
602:       if (fin) {
603:         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
604:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
605:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
606:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
607:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
608:       }
609:       if (fout) {
610:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
611:         PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
612:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
613:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
614:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
615:       }
616:       if (bout) {
617:         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
618:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
619:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
620:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
621:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
622:       }
623:   #else
624:       alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
625:       if (fin) {
626:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
627:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
628:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
629:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
630:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
631:       }
632:       if (fout) {
633:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
634:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
635:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
636:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
637:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
638:       }
639:       if (bout) {
640:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
641:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
642:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
643:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
644:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
645:       }
646:   #endif
647:       break;
648:     default:
649:   #if !defined(PETSC_USE_COMPLEX)
650:       temp                                  = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
651:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
652:       alloc_local                           = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
653:       N1                                    = 2 * fft->N * (PetscInt)(fftw->dim_fftw[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
654:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;

656:       if (fin) {
657:         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
658:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
659:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
660:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
661:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
662:       }
663:       if (fout) {
664:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
665:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
666:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
667:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
668:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
669:       }
670:       if (bout) {
671:         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
672:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
673:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
674:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
675:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
676:       }
677:   #else
678:       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
679:       if (fin) {
680:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
681:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
682:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
683:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
684:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
685:       }
686:       if (fout) {
687:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
688:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
689:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
690:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
691:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
692:       }
693:       if (bout) {
694:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
695:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
696:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
697:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
698:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
699:       }
700:   #endif
701:       break;
702:     }
703:     /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
704:        v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
705:        memory leaks. We void these pointers here to avoid accident uses.
706:     */
707:     if (fin) (*fin)->ops->replacearray = NULL;
708:     if (fout) (*fout)->ops->replacearray = NULL;
709:     if (bout) (*bout)->ops->replacearray = NULL;
710: #endif
711:   }
712:   PetscFunctionReturn(PETSC_SUCCESS);
713: }

715: /*@
716:   VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.

718:   Collective

720:   Input Parameters:
721: + A - FFTW matrix
722: - x - the PETSc vector

724:   Output Parameter:
725: . y - the FFTW vector

727:   Level: intermediate

729:   Note:
730:   For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
731:   one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
732:   zeros. This routine does that job by scattering operation.

734: .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
735: @*/
736: PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
737: {
738:   PetscFunctionBegin;
739:   PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
740:   PetscFunctionReturn(PETSC_SUCCESS);
741: }

743: static PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
744: {
745:   MPI_Comm    comm;
746:   Mat_FFT    *fft = (Mat_FFT *)A->data;
747:   PetscInt    low;
748:   PetscMPIInt rank, size;
749:   PetscInt    vsize, vsize1;
750:   VecScatter  vecscat;
751:   IS          list1;

753:   PetscFunctionBegin;
754:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
755:   PetscCallMPI(MPI_Comm_size(comm, &size));
756:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
757:   PetscCall(VecGetOwnershipRange(y, &low, NULL));

759:   if (size == 1) {
760:     PetscCall(VecGetSize(x, &vsize));
761:     PetscCall(VecGetSize(y, &vsize1));
762:     PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
763:     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
764:     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
765:     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
766:     PetscCall(VecScatterDestroy(&vecscat));
767:     PetscCall(ISDestroy(&list1));
768: #if !PetscDefined(HAVE_MPIUNI)
769:   } else {
770:     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
771:     PetscInt  ndim = fft->ndim, *dim = fft->dim;
772:     ptrdiff_t local_n0, local_0_start;
773:     ptrdiff_t local_n1, local_1_start;
774:     IS        list2;
775:   #if !defined(PETSC_USE_COMPLEX)
776:     PetscInt  i, j, k, partial_dim;
777:     PetscInt *indx1, *indx2, tempindx, tempindx1;
778:     PetscInt  NM;
779:     ptrdiff_t temp;
780:   #endif

782:     switch (ndim) {
783:     case 1:
784:   #if defined(PETSC_USE_COMPLEX)
785:       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);

787:       PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1));
788:       PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2));
789:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
790:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
791:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
792:       PetscCall(VecScatterDestroy(&vecscat));
793:       PetscCall(ISDestroy(&list1));
794:       PetscCall(ISDestroy(&list2));
795:   #else
796:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
797:   #endif
798:       break;
799:     case 2:
800:   #if defined(PETSC_USE_COMPLEX)
801:       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);

803:       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
804:       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
805:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
806:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
807:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
808:       PetscCall(VecScatterDestroy(&vecscat));
809:       PetscCall(ISDestroy(&list1));
810:       PetscCall(ISDestroy(&list2));
811:   #else
812:       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

814:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
815:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));

817:       if (dim[1] % 2 == 0) {
818:         NM = dim[1] + 2;
819:       } else {
820:         NM = dim[1] + 1;
821:       }
822:       for (i = 0; i < local_n0; i++) {
823:         for (j = 0; j < dim[1]; j++) {
824:           tempindx  = i * dim[1] + j;
825:           tempindx1 = i * NM + j;

827:           indx1[tempindx] = local_0_start * dim[1] + tempindx;
828:           indx2[tempindx] = low + tempindx1;
829:         }
830:       }

832:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
833:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));

835:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
836:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
837:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
838:       PetscCall(VecScatterDestroy(&vecscat));
839:       PetscCall(ISDestroy(&list1));
840:       PetscCall(ISDestroy(&list2));
841:       PetscCall(PetscFree(indx1));
842:       PetscCall(PetscFree(indx2));
843:   #endif
844:       break;

846:     case 3:
847:   #if defined(PETSC_USE_COMPLEX)
848:       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);

850:       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
851:       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
852:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
853:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
854:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
855:       PetscCall(VecScatterDestroy(&vecscat));
856:       PetscCall(ISDestroy(&list1));
857:       PetscCall(ISDestroy(&list2));
858:   #else
859:       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
860:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
861:       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

863:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
864:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));

866:       if (dim[2] % 2 == 0) NM = dim[2] + 2;
867:       else NM = dim[2] + 1;

869:       for (i = 0; i < local_n0; i++) {
870:         for (j = 0; j < dim[1]; j++) {
871:           for (k = 0; k < dim[2]; k++) {
872:             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
873:             tempindx1 = i * dim[1] * NM + j * NM + k;

875:             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
876:             indx2[tempindx] = low + tempindx1;
877:           }
878:         }
879:       }

881:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
882:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
883:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
884:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
885:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
886:       PetscCall(VecScatterDestroy(&vecscat));
887:       PetscCall(ISDestroy(&list1));
888:       PetscCall(ISDestroy(&list2));
889:       PetscCall(PetscFree(indx1));
890:       PetscCall(PetscFree(indx2));
891:   #endif
892:       break;

894:     default:
895:   #if defined(PETSC_USE_COMPLEX)
896:       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);

898:       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
899:       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
900:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
901:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
902:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
903:       PetscCall(VecScatterDestroy(&vecscat));
904:       PetscCall(ISDestroy(&list1));
905:       PetscCall(ISDestroy(&list2));
906:   #else
907:       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
908:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
909:       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];

911:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;

913:       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

915:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;

917:       partial_dim = fftw->partial_dim;

919:       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
920:       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));

922:       if (dim[ndim - 1] % 2 == 0) NM = 2;
923:       else NM = 1;

925:       j = low;
926:       for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
927:         indx1[i] = local_0_start * partial_dim + i;
928:         indx2[i] = j;
929:         if (k % dim[ndim - 1] == 0) j += NM;
930:         j++;
931:       }
932:       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
933:       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
934:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
935:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
936:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
937:       PetscCall(VecScatterDestroy(&vecscat));
938:       PetscCall(ISDestroy(&list1));
939:       PetscCall(ISDestroy(&list2));
940:       PetscCall(PetscFree(indx1));
941:       PetscCall(PetscFree(indx2));
942:   #endif
943:       break;
944:     }
945: #endif
946:   }
947:   PetscFunctionReturn(PETSC_SUCCESS);
948: }

950: /*@
951:   VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.

953:   Collective

955:   Input Parameters:
956: + A - `MATFFTW` matrix
957: - x - FFTW vector

959:   Output Parameter:
960: . y - PETSc vector

962:   Level: intermediate

964:   Note:
965:   While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
966:   `VecScatterFFTWToPetsc()` removes those extra zeros.

968: .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
969: @*/
970: PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
971: {
972:   PetscFunctionBegin;
973:   PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
974:   PetscFunctionReturn(PETSC_SUCCESS);
975: }

977: static PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
978: {
979:   MPI_Comm    comm;
980:   Mat_FFT    *fft = (Mat_FFT *)A->data;
981:   PetscInt    low;
982:   PetscMPIInt rank, size;
983:   VecScatter  vecscat;
984:   IS          list1;

986:   PetscFunctionBegin;
987:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
988:   PetscCallMPI(MPI_Comm_size(comm, &size));
989:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
990:   PetscCall(VecGetOwnershipRange(x, &low, NULL));

992:   if (size == 1) {
993:     PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
994:     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
995:     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
996:     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
997:     PetscCall(VecScatterDestroy(&vecscat));
998:     PetscCall(ISDestroy(&list1));

1000: #if !PetscDefined(HAVE_MPIUNI)
1001:   } else {
1002:     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
1003:     PetscInt  ndim = fft->ndim, *dim = fft->dim;
1004:     ptrdiff_t local_n0, local_0_start;
1005:     ptrdiff_t local_n1, local_1_start;
1006:     IS        list2;
1007:   #if !defined(PETSC_USE_COMPLEX)
1008:     PetscInt  i, j, k, partial_dim;
1009:     PetscInt *indx1, *indx2, tempindx, tempindx1;
1010:     PetscInt  NM;
1011:     ptrdiff_t temp;
1012:   #endif
1013:     switch (ndim) {
1014:     case 1:
1015:   #if defined(PETSC_USE_COMPLEX)
1016:       fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);

1018:       PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1));
1019:       PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2));
1020:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1021:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1022:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1023:       PetscCall(VecScatterDestroy(&vecscat));
1024:       PetscCall(ISDestroy(&list1));
1025:       PetscCall(ISDestroy(&list2));
1026:   #else
1027:       SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
1028:   #endif
1029:       break;
1030:     case 2:
1031:   #if defined(PETSC_USE_COMPLEX)
1032:       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);

1034:       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
1035:       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
1036:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1037:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1038:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1039:       PetscCall(VecScatterDestroy(&vecscat));
1040:       PetscCall(ISDestroy(&list1));
1041:       PetscCall(ISDestroy(&list2));
1042:   #else
1043:       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1045:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
1046:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));

1048:       if (dim[1] % 2 == 0) NM = dim[1] + 2;
1049:       else NM = dim[1] + 1;

1051:       for (i = 0; i < local_n0; i++) {
1052:         for (j = 0; j < dim[1]; j++) {
1053:           tempindx  = i * dim[1] + j;
1054:           tempindx1 = i * NM + j;

1056:           indx1[tempindx] = local_0_start * dim[1] + tempindx;
1057:           indx2[tempindx] = low + tempindx1;
1058:         }
1059:       }

1061:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
1062:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));

1064:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1065:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1066:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1067:       PetscCall(VecScatterDestroy(&vecscat));
1068:       PetscCall(ISDestroy(&list1));
1069:       PetscCall(ISDestroy(&list2));
1070:       PetscCall(PetscFree(indx1));
1071:       PetscCall(PetscFree(indx2));
1072:   #endif
1073:       break;
1074:     case 3:
1075:   #if defined(PETSC_USE_COMPLEX)
1076:       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);

1078:       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
1079:       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
1080:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1081:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1082:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1083:       PetscCall(VecScatterDestroy(&vecscat));
1084:       PetscCall(ISDestroy(&list1));
1085:       PetscCall(ISDestroy(&list2));
1086:   #else
1087:       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1089:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
1090:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));

1092:       if (dim[2] % 2 == 0) NM = dim[2] + 2;
1093:       else NM = dim[2] + 1;

1095:       for (i = 0; i < local_n0; i++) {
1096:         for (j = 0; j < dim[1]; j++) {
1097:           for (k = 0; k < dim[2]; k++) {
1098:             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
1099:             tempindx1 = i * dim[1] * NM + j * NM + k;

1101:             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
1102:             indx2[tempindx] = low + tempindx1;
1103:           }
1104:         }
1105:       }

1107:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
1108:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));

1110:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1111:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1112:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1113:       PetscCall(VecScatterDestroy(&vecscat));
1114:       PetscCall(ISDestroy(&list1));
1115:       PetscCall(ISDestroy(&list2));
1116:       PetscCall(PetscFree(indx1));
1117:       PetscCall(PetscFree(indx2));
1118:   #endif
1119:       break;
1120:     default:
1121:   #if defined(PETSC_USE_COMPLEX)
1122:       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);

1124:       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
1125:       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
1126:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1127:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1128:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1129:       PetscCall(VecScatterDestroy(&vecscat));
1130:       PetscCall(ISDestroy(&list1));
1131:       PetscCall(ISDestroy(&list2));
1132:   #else
1133:       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];

1135:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;

1137:       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1139:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;

1141:       partial_dim = fftw->partial_dim;

1143:       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
1144:       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));

1146:       if (dim[ndim - 1] % 2 == 0) NM = 2;
1147:       else NM = 1;

1149:       j = low;
1150:       for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
1151:         indx1[i] = local_0_start * partial_dim + i;
1152:         indx2[i] = j;
1153:         if (k % dim[ndim - 1] == 0) j += NM;
1154:         j++;
1155:       }
1156:       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
1157:       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));

1159:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1160:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1161:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1162:       PetscCall(VecScatterDestroy(&vecscat));
1163:       PetscCall(ISDestroy(&list1));
1164:       PetscCall(ISDestroy(&list2));
1165:       PetscCall(PetscFree(indx1));
1166:       PetscCall(PetscFree(indx2));
1167:   #endif
1168:       break;
1169:     }
1170: #endif
1171:   }
1172:   PetscFunctionReturn(PETSC_SUCCESS);
1173: }

1175: /*MC
1176:   MATFFTW -  "fftw" - Matrix type that provides FFT via the FFTW external package.

1178:   Level: intermediate

1180: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()`
1181: M*/
1182: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1183: {
1184:   MPI_Comm    comm;
1185:   Mat_FFT    *fft = (Mat_FFT *)A->data;
1186:   Mat_FFTW   *fftw;
1187:   PetscInt    ndim = fft->ndim, *dim = fft->dim;
1188:   const char *plans[]  = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
1189:   unsigned    iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1190:   PetscBool   flg;
1191:   PetscInt    p_flag, partial_dim = 1, ctr;
1192:   PetscMPIInt size, rank;
1193:   ptrdiff_t  *pdim;
1194: #if !defined(PETSC_USE_COMPLEX)
1195:   PetscInt tot_dim;
1196: #endif

1198:   PetscFunctionBegin;
1199:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1200:   PetscCallMPI(MPI_Comm_size(comm, &size));
1201:   PetscCallMPI(MPI_Comm_rank(comm, &rank));

1203: #if !PetscDefined(HAVE_MPIUNI)
1204:   fftw_mpi_init();
1205: #endif
1206:   pdim    = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
1207:   pdim[0] = dim[0];
1208: #if !defined(PETSC_USE_COMPLEX)
1209:   tot_dim = 2 * dim[0];
1210: #endif
1211:   for (ctr = 1; ctr < ndim; ctr++) {
1212:     partial_dim *= dim[ctr];
1213:     pdim[ctr] = dim[ctr];
1214: #if !defined(PETSC_USE_COMPLEX)
1215:     if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1216:     else tot_dim *= dim[ctr];
1217: #endif
1218:   }

1220:   if (size == 1) {
1221: #if defined(PETSC_USE_COMPLEX)
1222:     PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
1223:     fft->n = fft->N;
1224: #else
1225:     PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
1226:     fft->n = tot_dim;
1227: #endif
1228: #if !PetscDefined(HAVE_MPIUNI)
1229:   } else {
1230:     ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
1231:   #if !defined(PETSC_USE_COMPLEX)
1232:     ptrdiff_t temp;
1233:     PetscInt  N1;
1234:   #endif

1236:     switch (ndim) {
1237:     case 1:
1238:   #if !defined(PETSC_USE_COMPLEX)
1239:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1240:   #else
1241:       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1242:       fft->n = (PetscInt)local_n0;
1243:       PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N));
1244:   #endif
1245:       break;
1246:     case 2:
1247:   #if defined(PETSC_USE_COMPLEX)
1248:       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1249:       fft->n = (PetscInt)local_n0 * dim[1];
1250:       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1251:   #else
1252:       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1254:       fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
1255:       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
1256:   #endif
1257:       break;
1258:     case 3:
1259:   #if defined(PETSC_USE_COMPLEX)
1260:       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);

1262:       fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
1263:       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1264:   #else
1265:       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1267:       fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
1268:       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
1269:   #endif
1270:       break;
1271:     default:
1272:   #if defined(PETSC_USE_COMPLEX)
1273:       fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);

1275:       fft->n = (PetscInt)local_n0 * partial_dim;
1276:       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1277:   #else
1278:       temp = pdim[ndim - 1];

1280:       pdim[ndim - 1] = temp / 2 + 1;

1282:       fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1284:       fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp;
1285:       N1     = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);

1287:       pdim[ndim - 1] = temp;

1289:       PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1290:   #endif
1291:       break;
1292:     }
1293: #endif
1294:   }
1295:   free(pdim);
1296:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
1297:   PetscCall(PetscNew(&fftw));
1298:   fft->data = (void *)fftw;

1300:   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1301:   fftw->partial_dim = partial_dim;

1303:   PetscCall(PetscMalloc1(ndim, &fftw->dim_fftw));
1304:   if (size == 1) {
1305: #if defined(PETSC_USE_64BIT_INDICES)
1306:     fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1307: #else
1308:     fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1309: #endif
1310:   }

1312:   for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];

1314:   fftw->p_forward  = NULL;
1315:   fftw->p_backward = NULL;
1316:   fftw->p_flag     = FFTW_ESTIMATE;

1318:   if (size == 1) {
1319:     A->ops->mult          = MatMult_SeqFFTW;
1320:     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1321: #if !PetscDefined(HAVE_MPIUNI)
1322:   } else {
1323:     A->ops->mult          = MatMult_MPIFFTW;
1324:     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1325: #endif
1326:   }
1327:   fft->matdestroy = MatDestroy_FFTW;
1328:   A->assembled    = PETSC_TRUE;
1329:   A->preallocated = PETSC_TRUE;

1331:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
1332:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
1333:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));

1335:   /* get runtime options */
1336:   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
1337:   PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
1338:   if (flg) fftw->p_flag = iplans[p_flag];
1339:   PetscOptionsEnd();
1340:   PetscFunctionReturn(PETSC_SUCCESS);
1341: }