Actual source code: fftw.c

  1: /*
  2:     Provides an interface to the FFTW package.
  3:     Testing examples can be found in ~src/mat/tests
  4: */

  6: #include <../src/mat/impls/fft/fft.h>
  7: EXTERN_C_BEGIN
  8: #if !PetscDefined(HAVE_MPIUNI)
  9:   #include <fftw3-mpi.h>
 10: #else
 11:   #include <fftw3.h>
 12: #endif
 13: EXTERN_C_END

 15: typedef struct {
 16:   PetscInt   ndim_fftw;
 17:   ptrdiff_t *dim_fftw;
 18: #if defined(PETSC_USE_64BIT_INDICES)
 19:   fftw_iodim64 *iodims;
 20: #else
 21:   fftw_iodim *iodims;
 22: #endif
 23:   PetscInt     partial_dim;
 24:   fftw_plan    p_forward, p_backward;
 25:   unsigned     p_flag;                                      /* planner flags, FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
 26:   PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw_execute() can only be executed for the arrays with which the plan was created */
 27: } Mat_FFTW;

 29: extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);

 31: static PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
 32: {
 33:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
 34:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
 35:   const PetscScalar *x_array;
 36:   PetscScalar       *y_array;
 37:   Vec                xx;
 38: #if defined(PETSC_USE_COMPLEX)
 39:   #if defined(PETSC_USE_64BIT_INDICES)
 40:   fftw_iodim64 *iodims;
 41:   #else
 42:   fftw_iodim *iodims;
 43:   #endif
 44:   PetscInt i;
 45: #endif
 46:   PetscInt ndim = fft->ndim, *dim = fft->dim;

 48:   PetscFunctionBegin;
 49:   if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
 50:     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
 51:     PetscCall(VecDuplicate(x, &xx));
 52:     PetscCall(VecGetArrayRead(xx, &x_array));
 53:   } else {
 54:     PetscCall(VecGetArrayRead(x, &x_array));
 55:   }
 56:   PetscCall(VecGetArray(y, &y_array));
 57:   if (!fftw->p_forward) { /* create a plan, then execute it */
 58:     switch (ndim) {
 59:     case 1:
 60: #if defined(PETSC_USE_COMPLEX)
 61:       fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
 62: #else
 63:       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
 64: #endif
 65:       break;
 66:     case 2:
 67: #if defined(PETSC_USE_COMPLEX)
 68:       fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
 69: #else
 70:       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
 71: #endif
 72:       break;
 73:     case 3:
 74: #if defined(PETSC_USE_COMPLEX)
 75:       fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
 76: #else
 77:       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
 78: #endif
 79:       break;
 80:     default:
 81: #if defined(PETSC_USE_COMPLEX)
 82:       iodims = fftw->iodims;
 83:   #if defined(PETSC_USE_64BIT_INDICES)
 84:       if (ndim) {
 85:         iodims[ndim - 1].n  = (ptrdiff_t)dim[ndim - 1];
 86:         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
 87:         for (i = ndim - 2; i >= 0; --i) {
 88:           iodims[i].n  = (ptrdiff_t)dim[i];
 89:           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
 90:         }
 91:       }
 92:       fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
 93:   #else
 94:       if (ndim) {
 95:         iodims[ndim - 1].n  = (int)dim[ndim - 1];
 96:         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
 97:         for (i = ndim - 2; i >= 0; --i) {
 98:           iodims[i].n  = (int)dim[i];
 99:           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
100:         }
101:       }
102:       fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
103:   #endif

105: #else
106:       fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
107: #endif
108:       break;
109:     }
110:     if (fftw->p_flag != FFTW_ESTIMATE) {
111:       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
112:       PetscCall(VecRestoreArrayRead(xx, &x_array));
113:       PetscCall(VecDestroy(&xx));
114:       PetscCall(VecGetArrayRead(x, &x_array));
115:     } else {
116:       fftw->finarray  = (PetscScalar *)x_array;
117:       fftw->foutarray = y_array;
118:     }
119:   }

121:   if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
122: #if defined(PETSC_USE_COMPLEX)
123:     fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
124: #else
125:     fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
126: #endif
127:   } else {
128:     fftw_execute(fftw->p_forward);
129:   }
130:   PetscCall(VecRestoreArray(y, &y_array));
131:   PetscCall(VecRestoreArrayRead(x, &x_array));
132:   PetscFunctionReturn(PETSC_SUCCESS);
133: }

135: static PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
136: {
137:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
138:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
139:   const PetscScalar *x_array;
140:   PetscScalar       *y_array;
141:   PetscInt           ndim = fft->ndim, *dim = fft->dim;
142:   Vec                xx;
143: #if defined(PETSC_USE_COMPLEX)
144:   #if defined(PETSC_USE_64BIT_INDICES)
145:   fftw_iodim64 *iodims = fftw->iodims;
146:   #else
147:   fftw_iodim *iodims = fftw->iodims;
148:   #endif
149: #endif

151:   PetscFunctionBegin;
152:   if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
153:     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
154:     PetscCall(VecDuplicate(x, &xx));
155:     PetscCall(VecGetArrayRead(xx, &x_array));
156:   } else {
157:     PetscCall(VecGetArrayRead(x, &x_array));
158:   }
159:   PetscCall(VecGetArray(y, &y_array));
160:   if (!fftw->p_backward) { /* create a plan, then execute it */
161:     switch (ndim) {
162:     case 1:
163: #if defined(PETSC_USE_COMPLEX)
164:       fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
165: #else
166:       fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
167: #endif
168:       break;
169:     case 2:
170: #if defined(PETSC_USE_COMPLEX)
171:       fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
172: #else
173:       fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
174: #endif
175:       break;
176:     case 3:
177: #if defined(PETSC_USE_COMPLEX)
178:       fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
179: #else
180:       fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
181: #endif
182:       break;
183:     default:
184: #if defined(PETSC_USE_COMPLEX)
185:   #if defined(PETSC_USE_64BIT_INDICES)
186:       fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
187:   #else
188:       fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
189:   #endif
190: #else
191:       fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
192: #endif
193:       break;
194:     }
195:     if (fftw->p_flag != FFTW_ESTIMATE) {
196:       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
197:       PetscCall(VecRestoreArrayRead(xx, &x_array));
198:       PetscCall(VecDestroy(&xx));
199:       PetscCall(VecGetArrayRead(x, &x_array));
200:     } else {
201:       fftw->binarray  = (PetscScalar *)x_array;
202:       fftw->boutarray = y_array;
203:     }
204:   }
205:   if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
206: #if defined(PETSC_USE_COMPLEX)
207:     fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
208: #else
209:     fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
210: #endif
211:   } else {
212:     fftw_execute(fftw->p_backward);
213:   }
214:   PetscCall(VecRestoreArray(y, &y_array));
215:   PetscCall(VecRestoreArrayRead(x, &x_array));
216:   PetscFunctionReturn(PETSC_SUCCESS);
217: }

219: #if !PetscDefined(HAVE_MPIUNI)
220: static PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
221: {
222:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
223:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
224:   const PetscScalar *x_array;
225:   PetscScalar       *y_array;
226:   PetscInt           ndim = fft->ndim, *dim = fft->dim;
227:   MPI_Comm           comm;
228:   Vec                xx;

230:   PetscFunctionBegin;
231:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
232:   if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
233:     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
234:     PetscCall(VecDuplicate(x, &xx));
235:     PetscCall(VecGetArrayRead(xx, &x_array));
236:   } else {
237:     PetscCall(VecGetArrayRead(x, &x_array));
238:   }
239:   PetscCall(VecGetArray(y, &y_array));
240:   if (!fftw->p_forward) { /* create a plan, then execute it */
241:     switch (ndim) {
242:     case 1:
243:   #if defined(PETSC_USE_COMPLEX)
244:       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
245:   #else
246:       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
247:   #endif
248:       break;
249:     case 2:
250:   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
251:       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
252:   #else
253:       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
254:   #endif
255:       break;
256:     case 3:
257:   #if defined(PETSC_USE_COMPLEX)
258:       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
259:   #else
260:       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
261:   #endif
262:       break;
263:     default:
264:   #if defined(PETSC_USE_COMPLEX)
265:       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
266:   #else
267:       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
268:   #endif
269:       break;
270:     }
271:     if (fftw->p_flag != FFTW_ESTIMATE) {
272:       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
273:       PetscCall(VecRestoreArrayRead(xx, &x_array));
274:       PetscCall(VecDestroy(&xx));
275:       PetscCall(VecGetArrayRead(x, &x_array));
276:     } else {
277:       fftw->finarray  = (PetscScalar *)x_array;
278:       fftw->foutarray = y_array;
279:     }
280:   }
281:   if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
282:     fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
283:   } else {
284:     fftw_execute(fftw->p_forward);
285:   }
286:   PetscCall(VecRestoreArray(y, &y_array));
287:   PetscCall(VecRestoreArrayRead(x, &x_array));
288:   PetscFunctionReturn(PETSC_SUCCESS);
289: }

291: static PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
292: {
293:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
294:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
295:   const PetscScalar *x_array;
296:   PetscScalar       *y_array;
297:   PetscInt           ndim = fft->ndim, *dim = fft->dim;
298:   MPI_Comm           comm;
299:   Vec                xx;

301:   PetscFunctionBegin;
302:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
303:   if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
304:     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
305:     PetscCall(VecDuplicate(x, &xx));
306:     PetscCall(VecGetArrayRead(xx, &x_array));
307:   } else {
308:     PetscCall(VecGetArrayRead(x, &x_array));
309:   }
310:   PetscCall(VecGetArray(y, &y_array));
311:   if (!fftw->p_backward) { /* create a plan, then execute it */
312:     switch (ndim) {
313:     case 1:
314:   #if defined(PETSC_USE_COMPLEX)
315:       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
316:   #else
317:       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
318:   #endif
319:       break;
320:     case 2:
321:   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
322:       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
323:   #else
324:       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
325:   #endif
326:       break;
327:     case 3:
328:   #if defined(PETSC_USE_COMPLEX)
329:       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
330:   #else
331:       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
332:   #endif
333:       break;
334:     default:
335:   #if defined(PETSC_USE_COMPLEX)
336:       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
337:   #else
338:       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
339:   #endif
340:       break;
341:     }
342:     if (fftw->p_flag != FFTW_ESTIMATE) {
343:       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
344:       PetscCall(VecRestoreArrayRead(xx, &x_array));
345:       PetscCall(VecDestroy(&xx));
346:       PetscCall(VecGetArrayRead(x, &x_array));
347:     } else {
348:       fftw->binarray  = (PetscScalar *)x_array;
349:       fftw->boutarray = y_array;
350:     }
351:   }
352:   if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
353:     fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
354:   } else {
355:     fftw_execute(fftw->p_backward);
356:   }
357:   PetscCall(VecRestoreArray(y, &y_array));
358:   PetscCall(VecRestoreArrayRead(x, &x_array));
359:   PetscFunctionReturn(PETSC_SUCCESS);
360: }
361: #endif

363: static PetscErrorCode MatDestroy_FFTW(Mat A)
364: {
365:   Mat_FFT  *fft  = (Mat_FFT *)A->data;
366:   Mat_FFTW *fftw = (Mat_FFTW *)fft->data;

368:   PetscFunctionBegin;
369:   fftw_destroy_plan(fftw->p_forward);
370:   fftw_destroy_plan(fftw->p_backward);
371:   if (fftw->iodims) free(fftw->iodims);
372:   PetscCall(PetscFree(fftw->dim_fftw));
373:   PetscCall(PetscFree(fft->data));
374:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
375:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
376:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
377: #if !PetscDefined(HAVE_MPIUNI)
378:   fftw_mpi_cleanup();
379: #endif
380:   PetscFunctionReturn(PETSC_SUCCESS);
381: }

383: #if !PetscDefined(HAVE_MPIUNI)
384: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
385: static PetscErrorCode VecDestroy_MPIFFTW(Vec v)
386: {
387:   PetscScalar *array;

389:   PetscFunctionBegin;
390:   PetscCall(VecGetArray(v, &array));
391:   fftw_free((fftw_complex *)array);
392:   PetscCall(VecRestoreArray(v, &array));
393:   PetscCall(VecDestroy_MPI(v));
394:   PetscFunctionReturn(PETSC_SUCCESS);
395: }
396: #endif

398: #if !PetscDefined(HAVE_MPIUNI)
399: static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
400: {
401:   Mat A;

403:   PetscFunctionBegin;
404:   PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
405:   PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
406:   PetscFunctionReturn(PETSC_SUCCESS);
407: }

409: static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
410: {
411:   Mat A;

413:   PetscFunctionBegin;
414:   PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
415:   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
416:   PetscFunctionReturn(PETSC_SUCCESS);
417: }

419: static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
420: {
421:   Mat A;

423:   PetscFunctionBegin;
424:   PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
425:   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
426:   PetscFunctionReturn(PETSC_SUCCESS);
427: }
428: #endif

430: /*@
431:   MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
432:   parallel layout determined by `MATFFTW`

434:   Collective

436:   Input Parameter:
437: . A - the matrix

439:   Output Parameters:
440: + x - (optional) input vector of forward FFTW
441: . y - (optional) output vector of forward FFTW
442: - z - (optional) output vector of backward FFTW

444:   Options Database Key:
445: . -mat_fftw_plannerflags - set FFTW planner flags

447:   Level: advanced

449:   Notes:
450:   The parallel layout of output of forward FFTW is always same as the input
451:   of the backward FFTW. But parallel layout of the input vector of forward
452:   FFTW might not be same as the output of backward FFTW.

454:   We need to provide enough space while doing parallel real transform.
455:   We need to pad extra zeros at the end of last dimension. For this reason the one needs to
456:   invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
457:   last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
458:   depends on if the last dimension is even or odd. If the last dimension is even need to pad two
459:   zeros if it is odd only one zero is needed.

461:   Lastly one needs some scratch space at the end of data set in each process. alloc_local
462:   figures out how much space is needed, i.e. it figures out the data+scratch space for
463:   each processor and returns that.

465:   Developer Notes:
466:   How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?

468: .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
469: @*/
470: PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
471: {
472:   PetscFunctionBegin;
473:   PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
474:   PetscFunctionReturn(PETSC_SUCCESS);
475: }

477: PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
478: {
479:   PetscMPIInt size, rank;
480:   MPI_Comm    comm;
481:   Mat_FFT    *fft = (Mat_FFT *)A->data;

483:   PetscFunctionBegin;
486:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));

488:   PetscCallMPI(MPI_Comm_size(comm, &size));
489:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
490:   if (size == 1) { /* sequential case */
491: #if defined(PETSC_USE_COMPLEX)
492:     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
493:     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
494:     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
495: #else
496:     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
497:     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
498:     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
499: #endif
500: #if !PetscDefined(HAVE_MPIUNI)
501:   } else { /* parallel cases */
502:     Mat_FFTW     *fftw = (Mat_FFTW *)fft->data;
503:     PetscInt      ndim = fft->ndim, *dim = fft->dim;
504:     ptrdiff_t     alloc_local, local_n0, local_0_start;
505:     ptrdiff_t     local_n1;
506:     fftw_complex *data_fout;
507:     ptrdiff_t     local_1_start;
508:     PetscInt      n1, N1;
509:   #if defined(PETSC_USE_COMPLEX)
510:     fftw_complex *data_fin, *data_bout;
511:   #else
512:     double   *data_finr, *data_boutr;
513:     ptrdiff_t temp;
514:   #endif

516:     switch (ndim) {
517:     case 1:
518:   #if !defined(PETSC_USE_COMPLEX)
519:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
520:   #else
521:       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
522:       if (fin) {
523:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
524:         PetscCall(PetscIntCast(local_n0, &n1));
525:         PetscCall(PetscIntCast(fft->N, &N1));
526:         PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (const PetscScalar *)data_fin, fin));
527:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
528:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
529:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
530:       }
531:       if (fout) {
532:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
533:         PetscCall(PetscIntCast(local_n1, &n1));
534:         PetscCall(PetscIntCast(fft->N, &N1));
535:         PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (const PetscScalar *)data_fout, fout));
536:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
537:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
538:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
539:       }
540:       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
541:       if (bout) {
542:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
543:         PetscCall(PetscIntCast(local_n1, &n1));
544:         PetscCall(PetscIntCast(fft->N, &N1));
545:         PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (const PetscScalar *)data_bout, bout));
546:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
547:         (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
548:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
549:       }
550:       break;
551:   #endif
552:     case 2:
553:   #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
554:       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
555:       PetscCall(PetscIntCast(2 * dim[0] * (dim[1] / 2 + 1), &N1));
556:       PetscCall(PetscIntCast(2 * local_n0 * (dim[1] / 2 + 1), &n1));
557:       if (fin) {
558:         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
559:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
560:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
561:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
562:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
563:       }
564:       if (fout) {
565:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
566:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
567:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
568:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
569:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
570:       }
571:       if (bout) {
572:         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
573:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
574:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
575:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
576:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
577:       }
578:   #else
579:       /* Get local size */
580:       alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
581:       if (fin) {
582:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
583:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
584:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
585:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
586:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
587:       }
588:       if (fout) {
589:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
590:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
591:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
592:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
593:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
594:       }
595:       if (bout) {
596:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
597:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
598:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
599:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
600:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
601:       }
602:   #endif
603:       break;
604:     case 3:
605:   #if !defined(PETSC_USE_COMPLEX)
606:       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
607:       PetscCall(PetscIntCast(2 * dim[0] * dim[1] * (dim[2] / 2 + 1), &N1));
608:       PetscCall(PetscIntCast(2 * local_n0 * dim[1] * (dim[2] / 2 + 1), &n1));
609:       if (fin) {
610:         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
611:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
612:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
613:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
614:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
615:       }
616:       if (fout) {
617:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
618:         PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
619:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
620:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
621:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
622:       }
623:       if (bout) {
624:         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
625:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
626:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
627:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
628:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
629:       }
630:   #else
631:       alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
632:       if (fin) {
633:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
634:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
635:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
636:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
637:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
638:       }
639:       if (fout) {
640:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
641:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
642:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
643:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
644:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
645:       }
646:       if (bout) {
647:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
648:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
649:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
650:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
651:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
652:       }
653:   #endif
654:       break;
655:     default:
656:   #if !defined(PETSC_USE_COMPLEX)
657:       temp                                  = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
658:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
659:       alloc_local                           = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
660:       N1                                    = 2 * fft->N * (PetscInt)(fftw->dim_fftw[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
661:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;

663:       if (fin) {
664:         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
665:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
666:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
667:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
668:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
669:       }
670:       if (fout) {
671:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
672:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
673:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
674:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
675:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
676:       }
677:       if (bout) {
678:         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
679:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
680:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
681:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
682:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
683:       }
684:   #else
685:       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
686:       if (fin) {
687:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
688:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
689:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
690:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
691:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
692:       }
693:       if (fout) {
694:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
695:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
696:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
697:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
698:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
699:       }
700:       if (bout) {
701:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
702:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
703:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
704:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
705:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
706:       }
707:   #endif
708:       break;
709:     }
710:     /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
711:        v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
712:        memory leaks. We void these pointers here to avoid accident uses.
713:     */
714:     if (fin) (*fin)->ops->replacearray = NULL;
715:     if (fout) (*fout)->ops->replacearray = NULL;
716:     if (bout) (*bout)->ops->replacearray = NULL;
717: #endif
718:   }
719:   PetscFunctionReturn(PETSC_SUCCESS);
720: }

722: /*@
723:   VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.

725:   Collective

727:   Input Parameters:
728: + A - FFTW matrix
729: - x - the PETSc vector

731:   Output Parameter:
732: . y - the FFTW vector

734:   Level: intermediate

736:   Note:
737:   For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
738:   one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
739:   zeros. This routine does that job by scattering operation.

741: .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
742: @*/
743: PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
744: {
745:   PetscFunctionBegin;
746:   PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
747:   PetscFunctionReturn(PETSC_SUCCESS);
748: }

750: static PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
751: {
752:   MPI_Comm    comm;
753:   Mat_FFT    *fft = (Mat_FFT *)A->data;
754:   PetscInt    low;
755:   PetscMPIInt rank, size;
756:   PetscInt    vsize, vsize1;
757:   VecScatter  vecscat;
758:   IS          list1;

760:   PetscFunctionBegin;
761:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
762:   PetscCallMPI(MPI_Comm_size(comm, &size));
763:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
764:   PetscCall(VecGetOwnershipRange(y, &low, NULL));

766:   if (size == 1) {
767:     PetscCall(VecGetSize(x, &vsize));
768:     PetscCall(VecGetSize(y, &vsize1));
769:     PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
770:     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
771:     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
772:     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
773:     PetscCall(VecScatterDestroy(&vecscat));
774:     PetscCall(ISDestroy(&list1));
775: #if !PetscDefined(HAVE_MPIUNI)
776:   } else {
777:     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
778:     PetscInt  ndim = fft->ndim, *dim = fft->dim, n1;
779:     ptrdiff_t local_n0, local_0_start;
780:     ptrdiff_t local_n1, local_1_start;
781:     IS        list2;
782:   #if !defined(PETSC_USE_COMPLEX)
783:     PetscInt  i, j, k, partial_dim;
784:     PetscInt *indx1, *indx2, tempindx, tempindx1;
785:     PetscInt  NM;
786:     ptrdiff_t temp;
787:   #else
788:     PetscInt nstart, nlow;
789:   #endif

791:     switch (ndim) {
792:     case 1:
793:   #if defined(PETSC_USE_COMPLEX)
794:       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);

796:       PetscCall(PetscIntCast(local_n0, &n1));
797:       PetscCall(PetscIntCast(local_0_start, &nstart));
798:       PetscCall(PetscIntCast(low, &nlow));
799:       PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
800:       PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
801:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
802:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
803:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
804:       PetscCall(VecScatterDestroy(&vecscat));
805:       PetscCall(ISDestroy(&list1));
806:       PetscCall(ISDestroy(&list2));
807:   #else
808:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
809:   #endif
810:       break;
811:     case 2:
812:   #if defined(PETSC_USE_COMPLEX)
813:       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);

815:       PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
816:       PetscCall(PetscIntCast(local_0_start * dim[1], &nstart));
817:       PetscCall(PetscIntCast(low, &nlow));
818:       PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
819:       PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
820:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
821:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
822:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
823:       PetscCall(VecScatterDestroy(&vecscat));
824:       PetscCall(ISDestroy(&list1));
825:       PetscCall(ISDestroy(&list2));
826:   #else
827:       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

829:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
830:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));

832:       if (dim[1] % 2 == 0) {
833:         NM = dim[1] + 2;
834:       } else {
835:         NM = dim[1] + 1;
836:       }
837:       for (i = 0; i < local_n0; i++) {
838:         for (j = 0; j < dim[1]; j++) {
839:           tempindx  = i * dim[1] + j;
840:           tempindx1 = i * NM + j;

842:           PetscCall(PetscIntCast(local_0_start * dim[1] + tempindx, &indx1[tempindx]));
843:           indx2[tempindx] = low + tempindx1;
844:         }
845:       }

847:       PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
848:       PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
849:       PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));

851:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
852:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
853:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
854:       PetscCall(VecScatterDestroy(&vecscat));
855:       PetscCall(ISDestroy(&list1));
856:       PetscCall(ISDestroy(&list2));
857:       PetscCall(PetscFree(indx1));
858:       PetscCall(PetscFree(indx2));
859:   #endif
860:       break;

862:     case 3:
863:   #if defined(PETSC_USE_COMPLEX)
864:       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);

866:       PetscCall(PetscIntCast(local_n0 * dim[1] * dim[2], &n1));
867:       PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2], &nstart));
868:       PetscCall(PetscIntCast(low, &nlow));
869:       PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
870:       PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
871:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
872:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
873:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
874:       PetscCall(VecScatterDestroy(&vecscat));
875:       PetscCall(ISDestroy(&list1));
876:       PetscCall(ISDestroy(&list2));
877:   #else
878:       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
879:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
880:       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

882:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
883:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));

885:       if (dim[2] % 2 == 0) NM = dim[2] + 2;
886:       else NM = dim[2] + 1;

888:       for (i = 0; i < local_n0; i++) {
889:         for (j = 0; j < dim[1]; j++) {
890:           for (k = 0; k < dim[2]; k++) {
891:             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
892:             tempindx1 = i * dim[1] * NM + j * NM + k;

894:             PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2] + tempindx, &indx1[tempindx]));
895:             PetscCall(PetscIntCast(low + tempindx1, &indx2[tempindx]));
896:           }
897:         }
898:       }

900:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
901:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
902:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
903:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
904:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
905:       PetscCall(VecScatterDestroy(&vecscat));
906:       PetscCall(ISDestroy(&list1));
907:       PetscCall(ISDestroy(&list2));
908:       PetscCall(PetscFree(indx1));
909:       PetscCall(PetscFree(indx2));
910:   #endif
911:       break;

913:     default:
914:   #if defined(PETSC_USE_COMPLEX)
915:       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);

917:       PetscCall(PetscIntCast(local_n0 * (fftw->partial_dim), &n1));
918:       PetscCall(PetscIntCast(local_0_start * (fftw->partial_dim), &nstart));
919:       PetscCall(PetscIntCast(low, &nlow));
920:       PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
921:       PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
922:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
923:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
924:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
925:       PetscCall(VecScatterDestroy(&vecscat));
926:       PetscCall(ISDestroy(&list1));
927:       PetscCall(ISDestroy(&list2));
928:   #else
929:       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
930:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
931:       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];

933:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;

935:       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

937:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;

939:       partial_dim = fftw->partial_dim;

941:       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
942:       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));

944:       if (dim[ndim - 1] % 2 == 0) NM = 2;
945:       else NM = 1;

947:       j = low;
948:       for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
949:         indx1[i] = local_0_start * partial_dim + i;
950:         indx2[i] = j;
951:         if (k % dim[ndim - 1] == 0) j += NM;
952:         j++;
953:       }
954:       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
955:       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
956:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
957:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
958:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
959:       PetscCall(VecScatterDestroy(&vecscat));
960:       PetscCall(ISDestroy(&list1));
961:       PetscCall(ISDestroy(&list2));
962:       PetscCall(PetscFree(indx1));
963:       PetscCall(PetscFree(indx2));
964:   #endif
965:       break;
966:     }
967: #endif
968:   }
969:   PetscFunctionReturn(PETSC_SUCCESS);
970: }

972: /*@
973:   VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.

975:   Collective

977:   Input Parameters:
978: + A - `MATFFTW` matrix
979: - x - FFTW vector

981:   Output Parameter:
982: . y - PETSc vector

984:   Level: intermediate

986:   Note:
987:   While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
988:   `VecScatterFFTWToPetsc()` removes those extra zeros.

990: .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
991: @*/
992: PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
993: {
994:   PetscFunctionBegin;
995:   PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
996:   PetscFunctionReturn(PETSC_SUCCESS);
997: }

999: static PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
1000: {
1001:   MPI_Comm    comm;
1002:   Mat_FFT    *fft = (Mat_FFT *)A->data;
1003:   PetscInt    low;
1004:   PetscMPIInt rank, size;
1005:   VecScatter  vecscat;
1006:   IS          list1;

1008:   PetscFunctionBegin;
1009:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1010:   PetscCallMPI(MPI_Comm_size(comm, &size));
1011:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1012:   PetscCall(VecGetOwnershipRange(x, &low, NULL));

1014:   if (size == 1) {
1015:     PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
1016:     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
1017:     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1018:     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1019:     PetscCall(VecScatterDestroy(&vecscat));
1020:     PetscCall(ISDestroy(&list1));

1022: #if !PetscDefined(HAVE_MPIUNI)
1023:   } else {
1024:     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
1025:     PetscInt  ndim = fft->ndim, *dim = fft->dim;
1026:     ptrdiff_t local_n0, local_0_start;
1027:     ptrdiff_t local_n1, local_1_start;
1028:     IS        list2;
1029:   #if !defined(PETSC_USE_COMPLEX)
1030:     PetscInt  i, j, k, partial_dim;
1031:     PetscInt *indx1, *indx2, tempindx, tempindx1;
1032:     PetscInt  NM;
1033:     ptrdiff_t temp;
1034:   #else
1035:     PetscInt nlow, nstart;
1036:   #endif
1037:     PetscInt n1;
1038:     switch (ndim) {
1039:     case 1:
1040:   #if defined(PETSC_USE_COMPLEX)
1041:       fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);

1043:       PetscCall(PetscIntCast(local_n1, &n1));
1044:       PetscCall(PetscIntCast(local_1_start, &nstart));
1045:       PetscCall(PetscIntCast(low, &nlow));
1046:       PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1047:       PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
1048:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1049:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1050:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1051:       PetscCall(VecScatterDestroy(&vecscat));
1052:       PetscCall(ISDestroy(&list1));
1053:       PetscCall(ISDestroy(&list2));
1054:   #else
1055:       SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
1056:   #endif
1057:       break;
1058:     case 2:
1059:   #if defined(PETSC_USE_COMPLEX)
1060:       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);

1062:       PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
1063:       PetscCall(PetscIntCast(local_0_start * dim[1], &nstart));
1064:       PetscCall(PetscIntCast(low, &nlow));
1065:       PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1066:       PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
1067:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1068:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1069:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1070:       PetscCall(VecScatterDestroy(&vecscat));
1071:       PetscCall(ISDestroy(&list1));
1072:       PetscCall(ISDestroy(&list2));
1073:   #else
1074:       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1076:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
1077:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));

1079:       if (dim[1] % 2 == 0) NM = dim[1] + 2;
1080:       else NM = dim[1] + 1;

1082:       for (i = 0; i < local_n0; i++) {
1083:         for (j = 0; j < dim[1]; j++) {
1084:           tempindx  = i * dim[1] + j;
1085:           tempindx1 = i * NM + j;

1087:           PetscCall(PetscIntCast(local_0_start * dim[1] + tempindx, &indx1[tempindx]));
1088:           PetscCall(PetscIntCast(low + tempindx1, &indx2[tempindx]));
1089:         }
1090:       }

1092:       PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
1093:       PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
1094:       PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));

1096:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1097:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1098:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1099:       PetscCall(VecScatterDestroy(&vecscat));
1100:       PetscCall(ISDestroy(&list1));
1101:       PetscCall(ISDestroy(&list2));
1102:       PetscCall(PetscFree(indx1));
1103:       PetscCall(PetscFree(indx2));
1104:   #endif
1105:       break;
1106:     case 3:
1107:   #if defined(PETSC_USE_COMPLEX)
1108:       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);

1110:       PetscCall(PetscIntCast(local_n0 * dim[1] * dim[2], &n1));
1111:       PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2], &nstart));
1112:       PetscCall(PetscIntCast(low, &nlow));
1113:       PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1114:       PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
1115:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1116:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1117:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1118:       PetscCall(VecScatterDestroy(&vecscat));
1119:       PetscCall(ISDestroy(&list1));
1120:       PetscCall(ISDestroy(&list2));
1121:   #else
1122:       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1124:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
1125:       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));

1127:       if (dim[2] % 2 == 0) NM = dim[2] + 2;
1128:       else NM = dim[2] + 1;

1130:       for (i = 0; i < local_n0; i++) {
1131:         for (j = 0; j < dim[1]; j++) {
1132:           for (k = 0; k < dim[2]; k++) {
1133:             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
1134:             tempindx1 = i * dim[1] * NM + j * NM + k;

1136:             PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2] + tempindx, &indx1[tempindx]));
1137:             PetscCall(PetscIntCast(low + tempindx1, &indx2[tempindx]));
1138:           }
1139:         }
1140:       }

1142:       PetscCall(PetscIntCast(local_n0 * dim[1] * dim[2], &n1));
1143:       PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
1144:       PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));

1146:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1147:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1148:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1149:       PetscCall(VecScatterDestroy(&vecscat));
1150:       PetscCall(ISDestroy(&list1));
1151:       PetscCall(ISDestroy(&list2));
1152:       PetscCall(PetscFree(indx1));
1153:       PetscCall(PetscFree(indx2));
1154:   #endif
1155:       break;
1156:     default:
1157:   #if defined(PETSC_USE_COMPLEX)
1158:       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);

1160:       PetscCall(PetscIntCast(local_n0 * (fftw->partial_dim), &n1));
1161:       PetscCall(PetscIntCast(local_0_start * (fftw->partial_dim), &nstart));
1162:       PetscCall(PetscIntCast(low, &nlow));
1163:       PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1164:       PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
1165:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1166:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1167:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1168:       PetscCall(VecScatterDestroy(&vecscat));
1169:       PetscCall(ISDestroy(&list1));
1170:       PetscCall(ISDestroy(&list2));
1171:   #else
1172:       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];

1174:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;

1176:       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1178:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;

1180:       partial_dim = fftw->partial_dim;

1182:       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
1183:       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));

1185:       if (dim[ndim - 1] % 2 == 0) NM = 2;
1186:       else NM = 1;

1188:       j = low;
1189:       for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
1190:         PetscCall(PetscIntCast(local_0_start * partial_dim + i, &indx1[i]));
1191:         indx2[i] = j;
1192:         if (k % dim[ndim - 1] == 0) j += NM;
1193:         j++;
1194:       }
1195:       PetscCall(PetscIntCast(local_n0 * partial_dim, &n1));
1196:       PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
1197:       PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));

1199:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1200:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1201:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1202:       PetscCall(VecScatterDestroy(&vecscat));
1203:       PetscCall(ISDestroy(&list1));
1204:       PetscCall(ISDestroy(&list2));
1205:       PetscCall(PetscFree(indx1));
1206:       PetscCall(PetscFree(indx2));
1207:   #endif
1208:       break;
1209:     }
1210: #endif
1211:   }
1212:   PetscFunctionReturn(PETSC_SUCCESS);
1213: }

1215: /*MC
1216:   MATFFTW -  "fftw" - Matrix type that provides FFT via the FFTW external package.

1218:   Level: intermediate

1220: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()`
1221: M*/
1222: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1223: {
1224:   MPI_Comm    comm;
1225:   Mat_FFT    *fft = (Mat_FFT *)A->data;
1226:   Mat_FFTW   *fftw;
1227:   PetscInt    ndim = fft->ndim, *dim = fft->dim;
1228:   const char *plans[]  = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
1229:   unsigned    iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1230:   PetscBool   flg;
1231:   PetscInt    p_flag, partial_dim = 1, ctr;
1232:   PetscMPIInt size, rank;
1233:   ptrdiff_t  *pdim;
1234: #if !defined(PETSC_USE_COMPLEX)
1235:   PetscInt tot_dim;
1236: #endif

1238:   PetscFunctionBegin;
1239:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1240:   PetscCallMPI(MPI_Comm_size(comm, &size));
1241:   PetscCallMPI(MPI_Comm_rank(comm, &rank));

1243: #if !PetscDefined(HAVE_MPIUNI)
1244:   fftw_mpi_init();
1245: #endif
1246:   pdim    = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
1247:   pdim[0] = dim[0];
1248: #if !defined(PETSC_USE_COMPLEX)
1249:   tot_dim = 2 * dim[0];
1250: #endif
1251:   for (ctr = 1; ctr < ndim; ctr++) {
1252:     partial_dim *= dim[ctr];
1253:     pdim[ctr] = dim[ctr];
1254: #if !defined(PETSC_USE_COMPLEX)
1255:     if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1256:     else tot_dim *= dim[ctr];
1257: #endif
1258:   }

1260:   if (size == 1) {
1261: #if defined(PETSC_USE_COMPLEX)
1262:     PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
1263:     fft->n = fft->N;
1264: #else
1265:     PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
1266:     fft->n = tot_dim;
1267: #endif
1268: #if !PetscDefined(HAVE_MPIUNI)
1269:   } else {
1270:     ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
1271:   #if !defined(PETSC_USE_COMPLEX)
1272:     ptrdiff_t temp;
1273:     PetscInt  N1;
1274:   #else
1275:     PetscInt n1;
1276:   #endif

1278:     switch (ndim) {
1279:     case 1:
1280:   #if !defined(PETSC_USE_COMPLEX)
1281:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1282:   #else
1283:       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1284:       PetscCall(PetscIntCast(local_n0, &fft->n));
1285:       PetscCall(PetscIntCast(local_n1, &n1));
1286:       PetscCall(MatSetSizes(A, n1, fft->n, fft->N, fft->N));
1287:   #endif
1288:       break;
1289:     case 2:
1290:   #if defined(PETSC_USE_COMPLEX)
1291:       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1292:       fft->n = (PetscInt)local_n0 * dim[1];
1293:       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1294:   #else
1295:       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1297:       fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
1298:       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
1299:   #endif
1300:       break;
1301:     case 3:
1302:   #if defined(PETSC_USE_COMPLEX)
1303:       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);

1305:       fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
1306:       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1307:   #else
1308:       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1310:       fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
1311:       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
1312:   #endif
1313:       break;
1314:     default:
1315:   #if defined(PETSC_USE_COMPLEX)
1316:       fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);

1318:       PetscCall(PetscIntCast(local_n0 * partial_dim, &fft->n));
1319:       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1320:   #else
1321:       temp = pdim[ndim - 1];

1323:       pdim[ndim - 1] = temp / 2 + 1;

1325:       fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1327:       PetscCall(PetscIntCast(2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp, &fft->n));
1328:       N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);

1330:       pdim[ndim - 1] = temp;

1332:       PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1333:   #endif
1334:       break;
1335:     }
1336: #endif
1337:   }
1338:   free(pdim);
1339:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
1340:   PetscCall(PetscNew(&fftw));
1341:   fft->data = (void *)fftw;

1343:   fftw->ndim_fftw   = ndim; /* This is dimension of fft */
1344:   fftw->partial_dim = partial_dim;

1346:   PetscCall(PetscMalloc1(ndim, &fftw->dim_fftw));
1347:   if (size == 1) {
1348: #if defined(PETSC_USE_64BIT_INDICES)
1349:     fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1350: #else
1351:     fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1352: #endif
1353:   }

1355:   for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];

1357:   fftw->p_forward  = NULL;
1358:   fftw->p_backward = NULL;
1359:   fftw->p_flag     = FFTW_ESTIMATE;

1361:   if (size == 1) {
1362:     A->ops->mult          = MatMult_SeqFFTW;
1363:     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1364: #if !PetscDefined(HAVE_MPIUNI)
1365:   } else {
1366:     A->ops->mult          = MatMult_MPIFFTW;
1367:     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1368: #endif
1369:   }
1370:   fft->matdestroy = MatDestroy_FFTW;
1371:   A->assembled    = PETSC_TRUE;
1372:   A->preallocated = PETSC_TRUE;

1374:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
1375:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
1376:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));

1378:   /* get runtime options */
1379:   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
1380:   PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
1381:   if (flg) fftw->p_flag = iplans[p_flag];
1382:   PetscOptionsEnd();
1383:   PetscFunctionReturn(PETSC_SUCCESS);
1384: }