Actual source code: fftw.c

  1: /*
  2:     Provides an interface to the FFTW package.
  3:     Testing examples can be found in ~src/mat/tests
  4: */

  6: #include <../src/mat/impls/fft/fft.h>
  7: EXTERN_C_BEGIN
  8: #if !PetscDefined(HAVE_MPIUNI)
  9:   #include <fftw3-mpi.h>
 10: #else
 11:   #include <fftw3.h>
 12: #endif
 13: EXTERN_C_END

 15: typedef struct {
 16:   ptrdiff_t ndim_fftw, *dim_fftw;
 17: #if defined(PETSC_USE_64BIT_INDICES)
 18:   fftw_iodim64 *iodims;
 19: #else
 20:   fftw_iodim *iodims;
 21: #endif
 22:   PetscInt     partial_dim;
 23:   fftw_plan    p_forward, p_backward;
 24:   unsigned     p_flag;                                      /* planner flags, FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
 25:   PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw_execute() can only be executed for the arrays with which the plan was created */
 26: } Mat_FFTW;

 28: extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec);
 29: extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec);
 30: extern PetscErrorCode MatDestroy_FFTW(Mat);
 31: extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);
 32: #if !PetscDefined(HAVE_MPIUNI)
 33: extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec);
 34: extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec);
 35: extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
 36: #endif

 38: PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
 39: {
 40:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
 41:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
 42:   const PetscScalar *x_array;
 43:   PetscScalar       *y_array;
 44:   Vec                xx;
 45: #if defined(PETSC_USE_COMPLEX)
 46:   #if defined(PETSC_USE_64BIT_INDICES)
 47:   fftw_iodim64 *iodims;
 48:   #else
 49:   fftw_iodim *iodims;
 50:   #endif
 51:   PetscInt i;
 52: #endif
 53:   PetscInt ndim = fft->ndim, *dim = fft->dim;

 55:   PetscFunctionBegin;
 56:   if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
 57:     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
 58:     PetscCall(VecDuplicate(x, &xx));
 59:     PetscCall(VecGetArrayRead(xx, &x_array));
 60:   } else {
 61:     PetscCall(VecGetArrayRead(x, &x_array));
 62:   }
 63:   PetscCall(VecGetArray(y, &y_array));
 64:   if (!fftw->p_forward) { /* create a plan, then execute it */
 65:     switch (ndim) {
 66:     case 1:
 67: #if defined(PETSC_USE_COMPLEX)
 68:       fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
 69: #else
 70:       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
 71: #endif
 72:       break;
 73:     case 2:
 74: #if defined(PETSC_USE_COMPLEX)
 75:       fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
 76: #else
 77:       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
 78: #endif
 79:       break;
 80:     case 3:
 81: #if defined(PETSC_USE_COMPLEX)
 82:       fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
 83: #else
 84:       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
 85: #endif
 86:       break;
 87:     default:
 88: #if defined(PETSC_USE_COMPLEX)
 89:       iodims = fftw->iodims;
 90:   #if defined(PETSC_USE_64BIT_INDICES)
 91:       if (ndim) {
 92:         iodims[ndim - 1].n  = (ptrdiff_t)dim[ndim - 1];
 93:         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
 94:         for (i = ndim - 2; i >= 0; --i) {
 95:           iodims[i].n  = (ptrdiff_t)dim[i];
 96:           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
 97:         }
 98:       }
 99:       fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
100:   #else
101:       if (ndim) {
102:         iodims[ndim - 1].n  = (int)dim[ndim - 1];
103:         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
104:         for (i = ndim - 2; i >= 0; --i) {
105:           iodims[i].n  = (int)dim[i];
106:           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
107:         }
108:       }
109:       fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
110:   #endif

112: #else
113:       fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
114: #endif
115:       break;
116:     }
117:     if (fftw->p_flag != FFTW_ESTIMATE) {
118:       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
119:       PetscCall(VecRestoreArrayRead(xx, &x_array));
120:       PetscCall(VecDestroy(&xx));
121:       PetscCall(VecGetArrayRead(x, &x_array));
122:     } else {
123:       fftw->finarray  = (PetscScalar *)x_array;
124:       fftw->foutarray = y_array;
125:     }
126:   }

128:   if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
129: #if defined(PETSC_USE_COMPLEX)
130:     fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
131: #else
132:     fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
133: #endif
134:   } else {
135:     fftw_execute(fftw->p_forward);
136:   }
137:   PetscCall(VecRestoreArray(y, &y_array));
138:   PetscCall(VecRestoreArrayRead(x, &x_array));
139:   PetscFunctionReturn(PETSC_SUCCESS);
140: }

142: PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
143: {
144:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
145:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
146:   const PetscScalar *x_array;
147:   PetscScalar       *y_array;
148:   PetscInt           ndim = fft->ndim, *dim = fft->dim;
149:   Vec                xx;
150: #if defined(PETSC_USE_COMPLEX)
151:   #if defined(PETSC_USE_64BIT_INDICES)
152:   fftw_iodim64 *iodims = fftw->iodims;
153:   #else
154:   fftw_iodim *iodims = fftw->iodims;
155:   #endif
156: #endif

158:   PetscFunctionBegin;
159:   if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
160:     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
161:     PetscCall(VecDuplicate(x, &xx));
162:     PetscCall(VecGetArrayRead(xx, &x_array));
163:   } else {
164:     PetscCall(VecGetArrayRead(x, &x_array));
165:   }
166:   PetscCall(VecGetArray(y, &y_array));
167:   if (!fftw->p_backward) { /* create a plan, then execute it */
168:     switch (ndim) {
169:     case 1:
170: #if defined(PETSC_USE_COMPLEX)
171:       fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
172: #else
173:       fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
174: #endif
175:       break;
176:     case 2:
177: #if defined(PETSC_USE_COMPLEX)
178:       fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
179: #else
180:       fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
181: #endif
182:       break;
183:     case 3:
184: #if defined(PETSC_USE_COMPLEX)
185:       fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
186: #else
187:       fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
188: #endif
189:       break;
190:     default:
191: #if defined(PETSC_USE_COMPLEX)
192:   #if defined(PETSC_USE_64BIT_INDICES)
193:       fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
194:   #else
195:       fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
196:   #endif
197: #else
198:       fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
199: #endif
200:       break;
201:     }
202:     if (fftw->p_flag != FFTW_ESTIMATE) {
203:       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
204:       PetscCall(VecRestoreArrayRead(xx, &x_array));
205:       PetscCall(VecDestroy(&xx));
206:       PetscCall(VecGetArrayRead(x, &x_array));
207:     } else {
208:       fftw->binarray  = (PetscScalar *)x_array;
209:       fftw->boutarray = y_array;
210:     }
211:   }
212:   if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
213: #if defined(PETSC_USE_COMPLEX)
214:     fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
215: #else
216:     fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
217: #endif
218:   } else {
219:     fftw_execute(fftw->p_backward);
220:   }
221:   PetscCall(VecRestoreArray(y, &y_array));
222:   PetscCall(VecRestoreArrayRead(x, &x_array));
223:   PetscFunctionReturn(PETSC_SUCCESS);
224: }

226: #if !PetscDefined(HAVE_MPIUNI)
227: PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
228: {
229:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
230:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
231:   const PetscScalar *x_array;
232:   PetscScalar       *y_array;
233:   PetscInt           ndim = fft->ndim, *dim = fft->dim;
234:   MPI_Comm           comm;
235:   Vec                xx;

237:   PetscFunctionBegin;
238:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
239:   if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
240:     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
241:     PetscCall(VecDuplicate(x, &xx));
242:     PetscCall(VecGetArrayRead(xx, &x_array));
243:   } else {
244:     PetscCall(VecGetArrayRead(x, &x_array));
245:   }
246:   PetscCall(VecGetArray(y, &y_array));
247:   if (!fftw->p_forward) { /* create a plan, then execute it */
248:     switch (ndim) {
249:     case 1:
250:   #if defined(PETSC_USE_COMPLEX)
251:       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
252:   #else
253:       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
254:   #endif
255:       break;
256:     case 2:
257:   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
258:       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
259:   #else
260:       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
261:   #endif
262:       break;
263:     case 3:
264:   #if defined(PETSC_USE_COMPLEX)
265:       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
266:   #else
267:       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
268:   #endif
269:       break;
270:     default:
271:   #if defined(PETSC_USE_COMPLEX)
272:       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
273:   #else
274:       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
275:   #endif
276:       break;
277:     }
278:     if (fftw->p_flag != FFTW_ESTIMATE) {
279:       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
280:       PetscCall(VecRestoreArrayRead(xx, &x_array));
281:       PetscCall(VecDestroy(&xx));
282:       PetscCall(VecGetArrayRead(x, &x_array));
283:     } else {
284:       fftw->finarray  = (PetscScalar *)x_array;
285:       fftw->foutarray = y_array;
286:     }
287:   }
288:   if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
289:     fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
290:   } else {
291:     fftw_execute(fftw->p_forward);
292:   }
293:   PetscCall(VecRestoreArray(y, &y_array));
294:   PetscCall(VecRestoreArrayRead(x, &x_array));
295:   PetscFunctionReturn(PETSC_SUCCESS);
296: }

298: PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
299: {
300:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
301:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
302:   const PetscScalar *x_array;
303:   PetscScalar       *y_array;
304:   PetscInt           ndim = fft->ndim, *dim = fft->dim;
305:   MPI_Comm           comm;
306:   Vec                xx;

308:   PetscFunctionBegin;
309:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
310:   if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
311:     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
312:     PetscCall(VecDuplicate(x, &xx));
313:     PetscCall(VecGetArrayRead(xx, &x_array));
314:   } else {
315:     PetscCall(VecGetArrayRead(x, &x_array));
316:   }
317:   PetscCall(VecGetArray(y, &y_array));
318:   if (!fftw->p_backward) { /* create a plan, then execute it */
319:     switch (ndim) {
320:     case 1:
321:   #if defined(PETSC_USE_COMPLEX)
322:       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
323:   #else
324:       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
325:   #endif
326:       break;
327:     case 2:
328:   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
329:       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
330:   #else
331:       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
332:   #endif
333:       break;
334:     case 3:
335:   #if defined(PETSC_USE_COMPLEX)
336:       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
337:   #else
338:       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
339:   #endif
340:       break;
341:     default:
342:   #if defined(PETSC_USE_COMPLEX)
343:       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
344:   #else
345:       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
346:   #endif
347:       break;
348:     }
349:     if (fftw->p_flag != FFTW_ESTIMATE) {
350:       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
351:       PetscCall(VecRestoreArrayRead(xx, &x_array));
352:       PetscCall(VecDestroy(&xx));
353:       PetscCall(VecGetArrayRead(x, &x_array));
354:     } else {
355:       fftw->binarray  = (PetscScalar *)x_array;
356:       fftw->boutarray = y_array;
357:     }
358:   }
359:   if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
360:     fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
361:   } else {
362:     fftw_execute(fftw->p_backward);
363:   }
364:   PetscCall(VecRestoreArray(y, &y_array));
365:   PetscCall(VecRestoreArrayRead(x, &x_array));
366:   PetscFunctionReturn(PETSC_SUCCESS);
367: }
368: #endif

370: PetscErrorCode MatDestroy_FFTW(Mat A)
371: {
372:   Mat_FFT  *fft  = (Mat_FFT *)A->data;
373:   Mat_FFTW *fftw = (Mat_FFTW *)fft->data;

375:   PetscFunctionBegin;
376:   fftw_destroy_plan(fftw->p_forward);
377:   fftw_destroy_plan(fftw->p_backward);
378:   if (fftw->iodims) free(fftw->iodims);
379:   PetscCall(PetscFree(fftw->dim_fftw));
380:   PetscCall(PetscFree(fft->data));
381:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
382:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
383:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
384: #if !PetscDefined(HAVE_MPIUNI)
385:   fftw_mpi_cleanup();
386: #endif
387:   PetscFunctionReturn(PETSC_SUCCESS);
388: }

390: #if !PetscDefined(HAVE_MPIUNI)
391: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
392: PetscErrorCode VecDestroy_MPIFFTW(Vec v)
393: {
394:   PetscScalar *array;

396:   PetscFunctionBegin;
397:   PetscCall(VecGetArray(v, &array));
398:   fftw_free((fftw_complex *)array);
399:   PetscCall(VecRestoreArray(v, &array));
400:   PetscCall(VecDestroy_MPI(v));
401:   PetscFunctionReturn(PETSC_SUCCESS);
402: }
403: #endif

405: #if !PetscDefined(HAVE_MPIUNI)
406: static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
407: {
408:   Mat A;

410:   PetscFunctionBegin;
411:   PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
412:   PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
413:   PetscFunctionReturn(PETSC_SUCCESS);
414: }

416: static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
417: {
418:   Mat A;

420:   PetscFunctionBegin;
421:   PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
422:   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
423:   PetscFunctionReturn(PETSC_SUCCESS);
424: }

426: static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
427: {
428:   Mat A;

430:   PetscFunctionBegin;
431:   PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
432:   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
433:   PetscFunctionReturn(PETSC_SUCCESS);
434: }
435: #endif

437: /*@
438:   MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
439:   parallel layout determined by `MATFFTW`

441:   Collective

443:   Input Parameter:
444: . A - the matrix

446:   Output Parameters:
447: + x - (optional) input vector of forward FFTW
448: . y - (optional) output vector of forward FFTW
449: - z - (optional) output vector of backward FFTW

451:   Options Database Key:
452: . -mat_fftw_plannerflags - set FFTW planner flags

454:   Level: advanced

456:   Notes:
457:   The parallel layout of output of forward FFTW is always same as the input
458:   of the backward FFTW. But parallel layout of the input vector of forward
459:   FFTW might not be same as the output of backward FFTW.

461:   We need to provide enough space while doing parallel real transform.
462:   We need to pad extra zeros at the end of last dimension. For this reason the one needs to
463:   invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
464:   last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
465:   depends on if the last dimension is even or odd. If the last dimension is even need to pad two
466:   zeros if it is odd only one zero is needed.

468:   Lastly one needs some scratch space at the end of data set in each process. alloc_local
469:   figures out how much space is needed, i.e. it figures out the data+scratch space for
470:   each processor and returns that.

472:   Developer Notes:
473:   How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?

475: .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
476: @*/
477: PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
478: {
479:   PetscFunctionBegin;
480:   PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
481:   PetscFunctionReturn(PETSC_SUCCESS);
482: }

484: PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
485: {
486:   PetscMPIInt size, rank;
487:   MPI_Comm    comm;
488:   Mat_FFT    *fft = (Mat_FFT *)A->data;

490:   PetscFunctionBegin;
493:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));

495:   PetscCallMPI(MPI_Comm_size(comm, &size));
496:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
497:   if (size == 1) { /* sequential case */
498: #if defined(PETSC_USE_COMPLEX)
499:     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
500:     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
501:     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
502: #else
503:     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
504:     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
505:     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
506: #endif
507: #if !PetscDefined(HAVE_MPIUNI)
508:   } else { /* parallel cases */
509:     Mat_FFTW     *fftw = (Mat_FFTW *)fft->data;
510:     PetscInt      ndim = fft->ndim, *dim = fft->dim;
511:     ptrdiff_t     alloc_local, local_n0, local_0_start;
512:     ptrdiff_t     local_n1;
513:     fftw_complex *data_fout;
514:     ptrdiff_t     local_1_start;
515:   #if defined(PETSC_USE_COMPLEX)
516:     fftw_complex *data_fin, *data_bout;
517:   #else
518:     double   *data_finr, *data_boutr;
519:     PetscInt  n1, N1;
520:     ptrdiff_t temp;
521:   #endif

523:     switch (ndim) {
524:     case 1:
525:   #if !defined(PETSC_USE_COMPLEX)
526:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
527:   #else
528:       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
529:       if (fin) {
530:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
531:         PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin));
532:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
533:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
534:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
535:       }
536:       if (fout) {
537:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
538:         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout));
539:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
540:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
541:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
542:       }
543:       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
544:       if (bout) {
545:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
546:         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout));
547:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
548:         (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
549:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
550:       }
551:       break;
552:   #endif
553:     case 2:
554:   #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
555:       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
556:       N1          = 2 * dim[0] * (dim[1] / 2 + 1);
557:       n1          = 2 * local_n0 * (dim[1] / 2 + 1);
558:       if (fin) {
559:         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
560:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
561:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
562:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
563:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
564:       }
565:       if (fout) {
566:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
567:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
568:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
569:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
570:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
571:       }
572:       if (bout) {
573:         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
574:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
575:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
576:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
577:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
578:       }
579:   #else
580:       /* Get local size */
581:       alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
582:       if (fin) {
583:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
584:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
585:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
586:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
587:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
588:       }
589:       if (fout) {
590:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
591:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
592:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
593:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
594:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
595:       }
596:       if (bout) {
597:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
598:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
599:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
600:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
601:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
602:       }
603:   #endif
604:       break;
605:     case 3:
606:   #if !defined(PETSC_USE_COMPLEX)
607:       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
608:       N1          = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1);
609:       n1          = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1);
610:       if (fin) {
611:         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
612:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
613:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
614:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
615:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
616:       }
617:       if (fout) {
618:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
619:         PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
620:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
621:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
622:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
623:       }
624:       if (bout) {
625:         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
626:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
627:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
628:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
629:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
630:       }
631:   #else
632:       alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
633:       if (fin) {
634:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
635:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
636:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
637:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
638:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
639:       }
640:       if (fout) {
641:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
642:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
643:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
644:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
645:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
646:       }
647:       if (bout) {
648:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
649:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
650:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
651:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
652:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
653:       }
654:   #endif
655:       break;
656:     default:
657:   #if !defined(PETSC_USE_COMPLEX)
658:       temp                                  = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
659:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
660:       alloc_local                           = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
661:       N1                                    = 2 * fft->N * (PetscInt)(fftw->dim_fftw[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
662:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;

664:       if (fin) {
665:         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
666:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
667:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
668:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
669:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
670:       }
671:       if (fout) {
672:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
673:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
674:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
675:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
676:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
677:       }
678:       if (bout) {
679:         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
680:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
681:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
682:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
683:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
684:       }
685:   #else
686:       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
687:       if (fin) {
688:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
689:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
690:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
691:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
692:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
693:       }
694:       if (fout) {
695:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
696:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
697:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
698:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
699:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
700:       }
701:       if (bout) {
702:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
703:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
704:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
705:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
706:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
707:       }
708:   #endif
709:       break;
710:     }
711:     /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
712:        v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
713:        memory leaks. We void these pointers here to avoid accident uses.
714:     */
715:     if (fin) (*fin)->ops->replacearray = NULL;
716:     if (fout) (*fout)->ops->replacearray = NULL;
717:     if (bout) (*bout)->ops->replacearray = NULL;
718: #endif
719:   }
720:   PetscFunctionReturn(PETSC_SUCCESS);
721: }

723: /*@
724:   VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.

726:   Collective

728:   Input Parameters:
729: + A - FFTW matrix
730: - x - the PETSc vector

732:   Output Parameter:
733: . y - the FFTW vector

735:   Level: intermediate

737:   Note:
738:   For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
739:   one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
740:   zeros. This routine does that job by scattering operation.

742: .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
743: @*/
744: PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
745: {
746:   PetscFunctionBegin;
747:   PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
748:   PetscFunctionReturn(PETSC_SUCCESS);
749: }

751: static PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
752: {
753:   MPI_Comm    comm;
754:   Mat_FFT    *fft = (Mat_FFT *)A->data;
755:   PetscInt    low;
756:   PetscMPIInt rank, size;
757:   PetscInt    vsize, vsize1;
758:   VecScatter  vecscat;
759:   IS          list1;

761:   PetscFunctionBegin;
762:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
763:   PetscCallMPI(MPI_Comm_size(comm, &size));
764:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
765:   PetscCall(VecGetOwnershipRange(y, &low, NULL));

767:   if (size == 1) {
768:     PetscCall(VecGetSize(x, &vsize));
769:     PetscCall(VecGetSize(y, &vsize1));
770:     PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
771:     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
772:     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
773:     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
774:     PetscCall(VecScatterDestroy(&vecscat));
775:     PetscCall(ISDestroy(&list1));
776: #if !PetscDefined(HAVE_MPIUNI)
777:   } else {
778:     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
779:     PetscInt  ndim = fft->ndim, *dim = fft->dim;
780:     ptrdiff_t local_n0, local_0_start;
781:     ptrdiff_t local_n1, local_1_start;
782:     IS        list2;
783:   #if !defined(PETSC_USE_COMPLEX)
784:     PetscInt  i, j, k, partial_dim;
785:     PetscInt *indx1, *indx2, tempindx, tempindx1;
786:     PetscInt  NM;
787:     ptrdiff_t temp;
788:   #endif

790:     switch (ndim) {
791:     case 1:
792:   #if defined(PETSC_USE_COMPLEX)
793:       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);

795:       PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1));
796:       PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2));
797:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
798:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
799:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
800:       PetscCall(VecScatterDestroy(&vecscat));
801:       PetscCall(ISDestroy(&list1));
802:       PetscCall(ISDestroy(&list2));
803:   #else
804:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
805:   #endif
806:       break;
807:     case 2:
808:   #if defined(PETSC_USE_COMPLEX)
809:       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);

811:       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
812:       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
813:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
814:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
815:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
816:       PetscCall(VecScatterDestroy(&vecscat));
817:       PetscCall(ISDestroy(&list1));
818:       PetscCall(ISDestroy(&list2));
819:   #else
820:       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

822:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
823:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));

825:       if (dim[1] % 2 == 0) {
826:         NM = dim[1] + 2;
827:       } else {
828:         NM = dim[1] + 1;
829:       }
830:       for (i = 0; i < local_n0; i++) {
831:         for (j = 0; j < dim[1]; j++) {
832:           tempindx  = i * dim[1] + j;
833:           tempindx1 = i * NM + j;

835:           indx1[tempindx] = local_0_start * dim[1] + tempindx;
836:           indx2[tempindx] = low + tempindx1;
837:         }
838:       }

840:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
841:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));

843:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
844:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
845:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
846:       PetscCall(VecScatterDestroy(&vecscat));
847:       PetscCall(ISDestroy(&list1));
848:       PetscCall(ISDestroy(&list2));
849:       PetscCall(PetscFree(indx1));
850:       PetscCall(PetscFree(indx2));
851:   #endif
852:       break;

854:     case 3:
855:   #if defined(PETSC_USE_COMPLEX)
856:       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);

858:       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
859:       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
860:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
861:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
862:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
863:       PetscCall(VecScatterDestroy(&vecscat));
864:       PetscCall(ISDestroy(&list1));
865:       PetscCall(ISDestroy(&list2));
866:   #else
867:       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
868:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
869:       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

871:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
872:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));

874:       if (dim[2] % 2 == 0) NM = dim[2] + 2;
875:       else NM = dim[2] + 1;

877:       for (i = 0; i < local_n0; i++) {
878:         for (j = 0; j < dim[1]; j++) {
879:           for (k = 0; k < dim[2]; k++) {
880:             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
881:             tempindx1 = i * dim[1] * NM + j * NM + k;

883:             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
884:             indx2[tempindx] = low + tempindx1;
885:           }
886:         }
887:       }

889:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
890:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
891:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
892:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
893:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
894:       PetscCall(VecScatterDestroy(&vecscat));
895:       PetscCall(ISDestroy(&list1));
896:       PetscCall(ISDestroy(&list2));
897:       PetscCall(PetscFree(indx1));
898:       PetscCall(PetscFree(indx2));
899:   #endif
900:       break;

902:     default:
903:   #if defined(PETSC_USE_COMPLEX)
904:       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);

906:       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
907:       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
908:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
909:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
910:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
911:       PetscCall(VecScatterDestroy(&vecscat));
912:       PetscCall(ISDestroy(&list1));
913:       PetscCall(ISDestroy(&list2));
914:   #else
915:       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
916:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
917:       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];

919:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;

921:       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

923:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;

925:       partial_dim = fftw->partial_dim;

927:       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
928:       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));

930:       if (dim[ndim - 1] % 2 == 0) NM = 2;
931:       else NM = 1;

933:       j = low;
934:       for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
935:         indx1[i] = local_0_start * partial_dim + i;
936:         indx2[i] = j;
937:         if (k % dim[ndim - 1] == 0) j += NM;
938:         j++;
939:       }
940:       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
941:       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
942:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
943:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
944:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
945:       PetscCall(VecScatterDestroy(&vecscat));
946:       PetscCall(ISDestroy(&list1));
947:       PetscCall(ISDestroy(&list2));
948:       PetscCall(PetscFree(indx1));
949:       PetscCall(PetscFree(indx2));
950:   #endif
951:       break;
952:     }
953: #endif
954:   }
955:   PetscFunctionReturn(PETSC_SUCCESS);
956: }

958: /*@
959:   VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.

961:   Collective

963:   Input Parameters:
964: + A - `MATFFTW` matrix
965: - x - FFTW vector

967:   Output Parameter:
968: . y - PETSc vector

970:   Level: intermediate

972:   Note:
973:   While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
974:   `VecScatterFFTWToPetsc()` removes those extra zeros.

976: .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
977: @*/
978: PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
979: {
980:   PetscFunctionBegin;
981:   PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
982:   PetscFunctionReturn(PETSC_SUCCESS);
983: }

985: static PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
986: {
987:   MPI_Comm    comm;
988:   Mat_FFT    *fft = (Mat_FFT *)A->data;
989:   PetscInt    low;
990:   PetscMPIInt rank, size;
991:   VecScatter  vecscat;
992:   IS          list1;

994:   PetscFunctionBegin;
995:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
996:   PetscCallMPI(MPI_Comm_size(comm, &size));
997:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
998:   PetscCall(VecGetOwnershipRange(x, &low, NULL));

1000:   if (size == 1) {
1001:     PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
1002:     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
1003:     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1004:     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1005:     PetscCall(VecScatterDestroy(&vecscat));
1006:     PetscCall(ISDestroy(&list1));

1008: #if !PetscDefined(HAVE_MPIUNI)
1009:   } else {
1010:     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
1011:     PetscInt  ndim = fft->ndim, *dim = fft->dim;
1012:     ptrdiff_t local_n0, local_0_start;
1013:     ptrdiff_t local_n1, local_1_start;
1014:     IS        list2;
1015:   #if !defined(PETSC_USE_COMPLEX)
1016:     PetscInt  i, j, k, partial_dim;
1017:     PetscInt *indx1, *indx2, tempindx, tempindx1;
1018:     PetscInt  NM;
1019:     ptrdiff_t temp;
1020:   #endif
1021:     switch (ndim) {
1022:     case 1:
1023:   #if defined(PETSC_USE_COMPLEX)
1024:       fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);

1026:       PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1));
1027:       PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2));
1028:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1029:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1030:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1031:       PetscCall(VecScatterDestroy(&vecscat));
1032:       PetscCall(ISDestroy(&list1));
1033:       PetscCall(ISDestroy(&list2));
1034:   #else
1035:       SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
1036:   #endif
1037:       break;
1038:     case 2:
1039:   #if defined(PETSC_USE_COMPLEX)
1040:       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);

1042:       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
1043:       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
1044:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1045:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1046:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1047:       PetscCall(VecScatterDestroy(&vecscat));
1048:       PetscCall(ISDestroy(&list1));
1049:       PetscCall(ISDestroy(&list2));
1050:   #else
1051:       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1053:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
1054:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));

1056:       if (dim[1] % 2 == 0) NM = dim[1] + 2;
1057:       else NM = dim[1] + 1;

1059:       for (i = 0; i < local_n0; i++) {
1060:         for (j = 0; j < dim[1]; j++) {
1061:           tempindx  = i * dim[1] + j;
1062:           tempindx1 = i * NM + j;

1064:           indx1[tempindx] = local_0_start * dim[1] + tempindx;
1065:           indx2[tempindx] = low + tempindx1;
1066:         }
1067:       }

1069:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
1070:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));

1072:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1073:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1074:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1075:       PetscCall(VecScatterDestroy(&vecscat));
1076:       PetscCall(ISDestroy(&list1));
1077:       PetscCall(ISDestroy(&list2));
1078:       PetscCall(PetscFree(indx1));
1079:       PetscCall(PetscFree(indx2));
1080:   #endif
1081:       break;
1082:     case 3:
1083:   #if defined(PETSC_USE_COMPLEX)
1084:       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);

1086:       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
1087:       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
1088:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1089:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1090:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1091:       PetscCall(VecScatterDestroy(&vecscat));
1092:       PetscCall(ISDestroy(&list1));
1093:       PetscCall(ISDestroy(&list2));
1094:   #else
1095:       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1097:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
1098:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));

1100:       if (dim[2] % 2 == 0) NM = dim[2] + 2;
1101:       else NM = dim[2] + 1;

1103:       for (i = 0; i < local_n0; i++) {
1104:         for (j = 0; j < dim[1]; j++) {
1105:           for (k = 0; k < dim[2]; k++) {
1106:             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
1107:             tempindx1 = i * dim[1] * NM + j * NM + k;

1109:             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
1110:             indx2[tempindx] = low + tempindx1;
1111:           }
1112:         }
1113:       }

1115:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
1116:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));

1118:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1119:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1120:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1121:       PetscCall(VecScatterDestroy(&vecscat));
1122:       PetscCall(ISDestroy(&list1));
1123:       PetscCall(ISDestroy(&list2));
1124:       PetscCall(PetscFree(indx1));
1125:       PetscCall(PetscFree(indx2));
1126:   #endif
1127:       break;
1128:     default:
1129:   #if defined(PETSC_USE_COMPLEX)
1130:       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);

1132:       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
1133:       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
1134:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1135:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1136:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1137:       PetscCall(VecScatterDestroy(&vecscat));
1138:       PetscCall(ISDestroy(&list1));
1139:       PetscCall(ISDestroy(&list2));
1140:   #else
1141:       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];

1143:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;

1145:       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1147:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;

1149:       partial_dim = fftw->partial_dim;

1151:       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
1152:       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));

1154:       if (dim[ndim - 1] % 2 == 0) NM = 2;
1155:       else NM = 1;

1157:       j = low;
1158:       for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
1159:         indx1[i] = local_0_start * partial_dim + i;
1160:         indx2[i] = j;
1161:         if (k % dim[ndim - 1] == 0) j += NM;
1162:         j++;
1163:       }
1164:       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
1165:       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));

1167:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1168:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1169:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1170:       PetscCall(VecScatterDestroy(&vecscat));
1171:       PetscCall(ISDestroy(&list1));
1172:       PetscCall(ISDestroy(&list2));
1173:       PetscCall(PetscFree(indx1));
1174:       PetscCall(PetscFree(indx2));
1175:   #endif
1176:       break;
1177:     }
1178: #endif
1179:   }
1180:   PetscFunctionReturn(PETSC_SUCCESS);
1181: }

1183: /*MC
1184:   MATFFTW -  "fftw" - Matrix type that provides FFT via the FFTW external package.

1186:   Level: intermediate

1188: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()`
1189: M*/
1190: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1191: {
1192:   MPI_Comm    comm;
1193:   Mat_FFT    *fft = (Mat_FFT *)A->data;
1194:   Mat_FFTW   *fftw;
1195:   PetscInt    ndim = fft->ndim, *dim = fft->dim;
1196:   const char *plans[]  = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
1197:   unsigned    iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1198:   PetscBool   flg;
1199:   PetscInt    p_flag, partial_dim = 1, ctr;
1200:   PetscMPIInt size, rank;
1201:   ptrdiff_t  *pdim;
1202: #if !defined(PETSC_USE_COMPLEX)
1203:   PetscInt tot_dim;
1204: #endif

1206:   PetscFunctionBegin;
1207:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1208:   PetscCallMPI(MPI_Comm_size(comm, &size));
1209:   PetscCallMPI(MPI_Comm_rank(comm, &rank));

1211: #if !PetscDefined(HAVE_MPIUNI)
1212:   fftw_mpi_init();
1213: #endif
1214:   pdim    = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
1215:   pdim[0] = dim[0];
1216: #if !defined(PETSC_USE_COMPLEX)
1217:   tot_dim = 2 * dim[0];
1218: #endif
1219:   for (ctr = 1; ctr < ndim; ctr++) {
1220:     partial_dim *= dim[ctr];
1221:     pdim[ctr] = dim[ctr];
1222: #if !defined(PETSC_USE_COMPLEX)
1223:     if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1224:     else tot_dim *= dim[ctr];
1225: #endif
1226:   }

1228:   if (size == 1) {
1229: #if defined(PETSC_USE_COMPLEX)
1230:     PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
1231:     fft->n = fft->N;
1232: #else
1233:     PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
1234:     fft->n = tot_dim;
1235: #endif
1236: #if !PetscDefined(HAVE_MPIUNI)
1237:   } else {
1238:     ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
1239:   #if !defined(PETSC_USE_COMPLEX)
1240:     ptrdiff_t temp;
1241:     PetscInt  N1;
1242:   #endif

1244:     switch (ndim) {
1245:     case 1:
1246:   #if !defined(PETSC_USE_COMPLEX)
1247:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1248:   #else
1249:       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1250:       fft->n = (PetscInt)local_n0;
1251:       PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N));
1252:   #endif
1253:       break;
1254:     case 2:
1255:   #if defined(PETSC_USE_COMPLEX)
1256:       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1257:       fft->n = (PetscInt)local_n0 * dim[1];
1258:       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1259:   #else
1260:       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1262:       fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
1263:       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
1264:   #endif
1265:       break;
1266:     case 3:
1267:   #if defined(PETSC_USE_COMPLEX)
1268:       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);

1270:       fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
1271:       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1272:   #else
1273:       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1275:       fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
1276:       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
1277:   #endif
1278:       break;
1279:     default:
1280:   #if defined(PETSC_USE_COMPLEX)
1281:       fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);

1283:       fft->n = (PetscInt)local_n0 * partial_dim;
1284:       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1285:   #else
1286:       temp = pdim[ndim - 1];

1288:       pdim[ndim - 1] = temp / 2 + 1;

1290:       fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1292:       fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp;
1293:       N1     = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);

1295:       pdim[ndim - 1] = temp;

1297:       PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1298:   #endif
1299:       break;
1300:     }
1301: #endif
1302:   }
1303:   free(pdim);
1304:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
1305:   PetscCall(PetscNew(&fftw));
1306:   fft->data = (void *)fftw;

1308:   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1309:   fftw->partial_dim = partial_dim;

1311:   PetscCall(PetscMalloc1(ndim, &fftw->dim_fftw));
1312:   if (size == 1) {
1313: #if defined(PETSC_USE_64BIT_INDICES)
1314:     fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1315: #else
1316:     fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1317: #endif
1318:   }

1320:   for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];

1322:   fftw->p_forward  = NULL;
1323:   fftw->p_backward = NULL;
1324:   fftw->p_flag     = FFTW_ESTIMATE;

1326:   if (size == 1) {
1327:     A->ops->mult          = MatMult_SeqFFTW;
1328:     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1329: #if !PetscDefined(HAVE_MPIUNI)
1330:   } else {
1331:     A->ops->mult          = MatMult_MPIFFTW;
1332:     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1333: #endif
1334:   }
1335:   fft->matdestroy = MatDestroy_FFTW;
1336:   A->assembled    = PETSC_TRUE;
1337:   A->preallocated = PETSC_TRUE;

1339:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
1340:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
1341:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));

1343:   /* get runtime options */
1344:   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
1345:   PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
1346:   if (flg) fftw->p_flag = iplans[p_flag];
1347:   PetscOptionsEnd();
1348:   PetscFunctionReturn(PETSC_SUCCESS);
1349: }