Actual source code: fftw.c
1: /*
2: Provides an interface to the FFTW package.
3: Testing examples can be found in ~src/mat/tests
4: */
6: #include <../src/mat/impls/fft/fft.h>
7: EXTERN_C_BEGIN
8: #if !PetscDefined(HAVE_MPIUNI)
9: #include <fftw3-mpi.h>
10: #else
11: #include <fftw3.h>
12: #endif
13: EXTERN_C_END
15: typedef struct {
16: ptrdiff_t ndim_fftw, *dim_fftw;
17: #if defined(PETSC_USE_64BIT_INDICES)
18: fftw_iodim64 *iodims;
19: #else
20: fftw_iodim *iodims;
21: #endif
22: PetscInt partial_dim;
23: fftw_plan p_forward, p_backward;
24: unsigned p_flag; /* planner flags, FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
25: PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw_execute() can only be executed for the arrays with which the plan was created */
26: } Mat_FFTW;
28: extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec);
29: extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec);
30: extern PetscErrorCode MatDestroy_FFTW(Mat);
31: extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);
32: #if !PetscDefined(HAVE_MPIUNI)
33: extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec);
34: extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec);
35: extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
36: #endif
38: PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
39: {
40: Mat_FFT *fft = (Mat_FFT *)A->data;
41: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
42: const PetscScalar *x_array;
43: PetscScalar *y_array;
44: Vec xx;
45: #if defined(PETSC_USE_COMPLEX)
46: #if defined(PETSC_USE_64BIT_INDICES)
47: fftw_iodim64 *iodims;
48: #else
49: fftw_iodim *iodims;
50: #endif
51: PetscInt i;
52: #endif
53: PetscInt ndim = fft->ndim, *dim = fft->dim;
55: PetscFunctionBegin;
56: if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
57: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
58: PetscCall(VecDuplicate(x, &xx));
59: PetscCall(VecGetArrayRead(xx, &x_array));
60: } else {
61: PetscCall(VecGetArrayRead(x, &x_array));
62: }
63: PetscCall(VecGetArray(y, &y_array));
64: if (!fftw->p_forward) { /* create a plan, then execute it */
65: switch (ndim) {
66: case 1:
67: #if defined(PETSC_USE_COMPLEX)
68: fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
69: #else
70: fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
71: #endif
72: break;
73: case 2:
74: #if defined(PETSC_USE_COMPLEX)
75: fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
76: #else
77: fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
78: #endif
79: break;
80: case 3:
81: #if defined(PETSC_USE_COMPLEX)
82: fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
83: #else
84: fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
85: #endif
86: break;
87: default:
88: #if defined(PETSC_USE_COMPLEX)
89: iodims = fftw->iodims;
90: #if defined(PETSC_USE_64BIT_INDICES)
91: if (ndim) {
92: iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1];
93: iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
94: for (i = ndim - 2; i >= 0; --i) {
95: iodims[i].n = (ptrdiff_t)dim[i];
96: iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
97: }
98: }
99: fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
100: #else
101: if (ndim) {
102: iodims[ndim - 1].n = (int)dim[ndim - 1];
103: iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
104: for (i = ndim - 2; i >= 0; --i) {
105: iodims[i].n = (int)dim[i];
106: iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
107: }
108: }
109: fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
110: #endif
112: #else
113: fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
114: #endif
115: break;
116: }
117: if (fftw->p_flag != FFTW_ESTIMATE) {
118: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
119: PetscCall(VecRestoreArrayRead(xx, &x_array));
120: PetscCall(VecDestroy(&xx));
121: PetscCall(VecGetArrayRead(x, &x_array));
122: } else {
123: fftw->finarray = (PetscScalar *)x_array;
124: fftw->foutarray = y_array;
125: }
126: }
128: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
129: #if defined(PETSC_USE_COMPLEX)
130: fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
131: #else
132: fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
133: #endif
134: } else {
135: fftw_execute(fftw->p_forward);
136: }
137: PetscCall(VecRestoreArray(y, &y_array));
138: PetscCall(VecRestoreArrayRead(x, &x_array));
139: PetscFunctionReturn(PETSC_SUCCESS);
140: }
142: PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
143: {
144: Mat_FFT *fft = (Mat_FFT *)A->data;
145: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
146: const PetscScalar *x_array;
147: PetscScalar *y_array;
148: PetscInt ndim = fft->ndim, *dim = fft->dim;
149: Vec xx;
150: #if defined(PETSC_USE_COMPLEX)
151: #if defined(PETSC_USE_64BIT_INDICES)
152: fftw_iodim64 *iodims = fftw->iodims;
153: #else
154: fftw_iodim *iodims = fftw->iodims;
155: #endif
156: #endif
158: PetscFunctionBegin;
159: if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
160: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
161: PetscCall(VecDuplicate(x, &xx));
162: PetscCall(VecGetArrayRead(xx, &x_array));
163: } else {
164: PetscCall(VecGetArrayRead(x, &x_array));
165: }
166: PetscCall(VecGetArray(y, &y_array));
167: if (!fftw->p_backward) { /* create a plan, then execute it */
168: switch (ndim) {
169: case 1:
170: #if defined(PETSC_USE_COMPLEX)
171: fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
172: #else
173: fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
174: #endif
175: break;
176: case 2:
177: #if defined(PETSC_USE_COMPLEX)
178: fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
179: #else
180: fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
181: #endif
182: break;
183: case 3:
184: #if defined(PETSC_USE_COMPLEX)
185: fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
186: #else
187: fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
188: #endif
189: break;
190: default:
191: #if defined(PETSC_USE_COMPLEX)
192: #if defined(PETSC_USE_64BIT_INDICES)
193: fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
194: #else
195: fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
196: #endif
197: #else
198: fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
199: #endif
200: break;
201: }
202: if (fftw->p_flag != FFTW_ESTIMATE) {
203: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
204: PetscCall(VecRestoreArrayRead(xx, &x_array));
205: PetscCall(VecDestroy(&xx));
206: PetscCall(VecGetArrayRead(x, &x_array));
207: } else {
208: fftw->binarray = (PetscScalar *)x_array;
209: fftw->boutarray = y_array;
210: }
211: }
212: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
213: #if defined(PETSC_USE_COMPLEX)
214: fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
215: #else
216: fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
217: #endif
218: } else {
219: fftw_execute(fftw->p_backward);
220: }
221: PetscCall(VecRestoreArray(y, &y_array));
222: PetscCall(VecRestoreArrayRead(x, &x_array));
223: PetscFunctionReturn(PETSC_SUCCESS);
224: }
226: #if !PetscDefined(HAVE_MPIUNI)
227: PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
228: {
229: Mat_FFT *fft = (Mat_FFT *)A->data;
230: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
231: const PetscScalar *x_array;
232: PetscScalar *y_array;
233: PetscInt ndim = fft->ndim, *dim = fft->dim;
234: MPI_Comm comm;
235: Vec xx;
237: PetscFunctionBegin;
238: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
239: if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
240: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
241: PetscCall(VecDuplicate(x, &xx));
242: PetscCall(VecGetArrayRead(xx, &x_array));
243: } else {
244: PetscCall(VecGetArrayRead(x, &x_array));
245: }
246: PetscCall(VecGetArray(y, &y_array));
247: if (!fftw->p_forward) { /* create a plan, then execute it */
248: switch (ndim) {
249: case 1:
250: #if defined(PETSC_USE_COMPLEX)
251: fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
252: #else
253: SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
254: #endif
255: break;
256: case 2:
257: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
258: fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
259: #else
260: fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
261: #endif
262: break;
263: case 3:
264: #if defined(PETSC_USE_COMPLEX)
265: fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
266: #else
267: fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
268: #endif
269: break;
270: default:
271: #if defined(PETSC_USE_COMPLEX)
272: fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
273: #else
274: fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
275: #endif
276: break;
277: }
278: if (fftw->p_flag != FFTW_ESTIMATE) {
279: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
280: PetscCall(VecRestoreArrayRead(xx, &x_array));
281: PetscCall(VecDestroy(&xx));
282: PetscCall(VecGetArrayRead(x, &x_array));
283: } else {
284: fftw->finarray = (PetscScalar *)x_array;
285: fftw->foutarray = y_array;
286: }
287: }
288: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
289: fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
290: } else {
291: fftw_execute(fftw->p_forward);
292: }
293: PetscCall(VecRestoreArray(y, &y_array));
294: PetscCall(VecRestoreArrayRead(x, &x_array));
295: PetscFunctionReturn(PETSC_SUCCESS);
296: }
298: PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
299: {
300: Mat_FFT *fft = (Mat_FFT *)A->data;
301: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
302: const PetscScalar *x_array;
303: PetscScalar *y_array;
304: PetscInt ndim = fft->ndim, *dim = fft->dim;
305: MPI_Comm comm;
306: Vec xx;
308: PetscFunctionBegin;
309: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
310: if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
311: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
312: PetscCall(VecDuplicate(x, &xx));
313: PetscCall(VecGetArrayRead(xx, &x_array));
314: } else {
315: PetscCall(VecGetArrayRead(x, &x_array));
316: }
317: PetscCall(VecGetArray(y, &y_array));
318: if (!fftw->p_backward) { /* create a plan, then execute it */
319: switch (ndim) {
320: case 1:
321: #if defined(PETSC_USE_COMPLEX)
322: fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
323: #else
324: SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
325: #endif
326: break;
327: case 2:
328: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
329: fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
330: #else
331: fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
332: #endif
333: break;
334: case 3:
335: #if defined(PETSC_USE_COMPLEX)
336: fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
337: #else
338: fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
339: #endif
340: break;
341: default:
342: #if defined(PETSC_USE_COMPLEX)
343: fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
344: #else
345: fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
346: #endif
347: break;
348: }
349: if (fftw->p_flag != FFTW_ESTIMATE) {
350: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
351: PetscCall(VecRestoreArrayRead(xx, &x_array));
352: PetscCall(VecDestroy(&xx));
353: PetscCall(VecGetArrayRead(x, &x_array));
354: } else {
355: fftw->binarray = (PetscScalar *)x_array;
356: fftw->boutarray = y_array;
357: }
358: }
359: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
360: fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
361: } else {
362: fftw_execute(fftw->p_backward);
363: }
364: PetscCall(VecRestoreArray(y, &y_array));
365: PetscCall(VecRestoreArrayRead(x, &x_array));
366: PetscFunctionReturn(PETSC_SUCCESS);
367: }
368: #endif
370: PetscErrorCode MatDestroy_FFTW(Mat A)
371: {
372: Mat_FFT *fft = (Mat_FFT *)A->data;
373: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
375: PetscFunctionBegin;
376: fftw_destroy_plan(fftw->p_forward);
377: fftw_destroy_plan(fftw->p_backward);
378: if (fftw->iodims) free(fftw->iodims);
379: PetscCall(PetscFree(fftw->dim_fftw));
380: PetscCall(PetscFree(fft->data));
381: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
382: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
383: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
384: #if !PetscDefined(HAVE_MPIUNI)
385: fftw_mpi_cleanup();
386: #endif
387: PetscFunctionReturn(PETSC_SUCCESS);
388: }
390: #if !PetscDefined(HAVE_MPIUNI)
391: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
392: PetscErrorCode VecDestroy_MPIFFTW(Vec v)
393: {
394: PetscScalar *array;
396: PetscFunctionBegin;
397: PetscCall(VecGetArray(v, &array));
398: fftw_free((fftw_complex *)array);
399: PetscCall(VecRestoreArray(v, &array));
400: PetscCall(VecDestroy_MPI(v));
401: PetscFunctionReturn(PETSC_SUCCESS);
402: }
403: #endif
405: #if !PetscDefined(HAVE_MPIUNI)
406: static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
407: {
408: Mat A;
410: PetscFunctionBegin;
411: PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
412: PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
413: PetscFunctionReturn(PETSC_SUCCESS);
414: }
416: static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
417: {
418: Mat A;
420: PetscFunctionBegin;
421: PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
422: PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
423: PetscFunctionReturn(PETSC_SUCCESS);
424: }
426: static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
427: {
428: Mat A;
430: PetscFunctionBegin;
431: PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
432: PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
433: PetscFunctionReturn(PETSC_SUCCESS);
434: }
435: #endif
437: /*@
438: MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
439: parallel layout determined by `MATFFTW`
441: Collective
443: Input Parameter:
444: . A - the matrix
446: Output Parameters:
447: + x - (optional) input vector of forward FFTW
448: . y - (optional) output vector of forward FFTW
449: - z - (optional) output vector of backward FFTW
451: Options Database Key:
452: . -mat_fftw_plannerflags - set FFTW planner flags
454: Level: advanced
456: Notes:
457: The parallel layout of output of forward FFTW is always same as the input
458: of the backward FFTW. But parallel layout of the input vector of forward
459: FFTW might not be same as the output of backward FFTW.
461: We need to provide enough space while doing parallel real transform.
462: We need to pad extra zeros at the end of last dimension. For this reason the one needs to
463: invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
464: last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
465: depends on if the last dimension is even or odd. If the last dimension is even need to pad two
466: zeros if it is odd only one zero is needed.
468: Lastly one needs some scratch space at the end of data set in each process. alloc_local
469: figures out how much space is needed, i.e. it figures out the data+scratch space for
470: each processor and returns that.
472: Developer Notes:
473: How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?
475: .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
476: @*/
477: PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
478: {
479: PetscFunctionBegin;
480: PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
481: PetscFunctionReturn(PETSC_SUCCESS);
482: }
484: PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
485: {
486: PetscMPIInt size, rank;
487: MPI_Comm comm;
488: Mat_FFT *fft = (Mat_FFT *)A->data;
490: PetscFunctionBegin;
493: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
495: PetscCallMPI(MPI_Comm_size(comm, &size));
496: PetscCallMPI(MPI_Comm_rank(comm, &rank));
497: if (size == 1) { /* sequential case */
498: #if defined(PETSC_USE_COMPLEX)
499: if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
500: if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
501: if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
502: #else
503: if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
504: if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
505: if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
506: #endif
507: #if !PetscDefined(HAVE_MPIUNI)
508: } else { /* parallel cases */
509: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
510: PetscInt ndim = fft->ndim, *dim = fft->dim;
511: ptrdiff_t alloc_local, local_n0, local_0_start;
512: ptrdiff_t local_n1;
513: fftw_complex *data_fout;
514: ptrdiff_t local_1_start;
515: #if defined(PETSC_USE_COMPLEX)
516: fftw_complex *data_fin, *data_bout;
517: #else
518: double *data_finr, *data_boutr;
519: PetscInt n1, N1;
520: ptrdiff_t temp;
521: #endif
523: switch (ndim) {
524: case 1:
525: #if !defined(PETSC_USE_COMPLEX)
526: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
527: #else
528: alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
529: if (fin) {
530: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
531: PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin));
532: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
533: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
534: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
535: }
536: if (fout) {
537: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
538: PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout));
539: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
540: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
541: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
542: }
543: alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
544: if (bout) {
545: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
546: PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout));
547: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
548: (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
549: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
550: }
551: break;
552: #endif
553: case 2:
554: #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
555: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
556: N1 = 2 * dim[0] * (dim[1] / 2 + 1);
557: n1 = 2 * local_n0 * (dim[1] / 2 + 1);
558: if (fin) {
559: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
560: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
561: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
562: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
563: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
564: }
565: if (fout) {
566: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
567: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
568: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
569: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
570: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
571: }
572: if (bout) {
573: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
574: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
575: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
576: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
577: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
578: }
579: #else
580: /* Get local size */
581: alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
582: if (fin) {
583: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
584: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
585: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
586: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
587: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
588: }
589: if (fout) {
590: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
591: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
592: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
593: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
594: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
595: }
596: if (bout) {
597: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
598: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
599: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
600: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
601: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
602: }
603: #endif
604: break;
605: case 3:
606: #if !defined(PETSC_USE_COMPLEX)
607: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
608: N1 = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1);
609: n1 = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1);
610: if (fin) {
611: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
612: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
613: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
614: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
615: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
616: }
617: if (fout) {
618: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
619: PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
620: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
621: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
622: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
623: }
624: if (bout) {
625: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
626: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
627: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
628: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
629: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
630: }
631: #else
632: alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
633: if (fin) {
634: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
635: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
636: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
637: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
638: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
639: }
640: if (fout) {
641: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
642: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
643: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
644: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
645: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
646: }
647: if (bout) {
648: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
649: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
650: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
651: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
652: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
653: }
654: #endif
655: break;
656: default:
657: #if !defined(PETSC_USE_COMPLEX)
658: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
659: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
660: alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
661: N1 = 2 * fft->N * (PetscInt)(fftw->dim_fftw[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
662: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
664: if (fin) {
665: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
666: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
667: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
668: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
669: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
670: }
671: if (fout) {
672: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
673: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
674: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
675: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
676: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
677: }
678: if (bout) {
679: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
680: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
681: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
682: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
683: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
684: }
685: #else
686: alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
687: if (fin) {
688: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
689: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
690: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
691: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
692: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
693: }
694: if (fout) {
695: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
696: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
697: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
698: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
699: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
700: }
701: if (bout) {
702: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
703: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
704: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
705: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
706: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
707: }
708: #endif
709: break;
710: }
711: /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
712: v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
713: memory leaks. We void these pointers here to avoid accident uses.
714: */
715: if (fin) (*fin)->ops->replacearray = NULL;
716: if (fout) (*fout)->ops->replacearray = NULL;
717: if (bout) (*bout)->ops->replacearray = NULL;
718: #endif
719: }
720: PetscFunctionReturn(PETSC_SUCCESS);
721: }
723: /*@
724: VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.
726: Collective
728: Input Parameters:
729: + A - FFTW matrix
730: - x - the PETSc vector
732: Output Parameter:
733: . y - the FFTW vector
735: Level: intermediate
737: Note:
738: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
739: one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
740: zeros. This routine does that job by scattering operation.
742: .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
743: @*/
744: PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
745: {
746: PetscFunctionBegin;
747: PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
748: PetscFunctionReturn(PETSC_SUCCESS);
749: }
751: static PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
752: {
753: MPI_Comm comm;
754: Mat_FFT *fft = (Mat_FFT *)A->data;
755: PetscInt low;
756: PetscMPIInt rank, size;
757: PetscInt vsize, vsize1;
758: VecScatter vecscat;
759: IS list1;
761: PetscFunctionBegin;
762: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
763: PetscCallMPI(MPI_Comm_size(comm, &size));
764: PetscCallMPI(MPI_Comm_rank(comm, &rank));
765: PetscCall(VecGetOwnershipRange(y, &low, NULL));
767: if (size == 1) {
768: PetscCall(VecGetSize(x, &vsize));
769: PetscCall(VecGetSize(y, &vsize1));
770: PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
771: PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
772: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
773: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
774: PetscCall(VecScatterDestroy(&vecscat));
775: PetscCall(ISDestroy(&list1));
776: #if !PetscDefined(HAVE_MPIUNI)
777: } else {
778: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
779: PetscInt ndim = fft->ndim, *dim = fft->dim;
780: ptrdiff_t local_n0, local_0_start;
781: ptrdiff_t local_n1, local_1_start;
782: IS list2;
783: #if !defined(PETSC_USE_COMPLEX)
784: PetscInt i, j, k, partial_dim;
785: PetscInt *indx1, *indx2, tempindx, tempindx1;
786: PetscInt NM;
787: ptrdiff_t temp;
788: #endif
790: switch (ndim) {
791: case 1:
792: #if defined(PETSC_USE_COMPLEX)
793: fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
795: PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1));
796: PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2));
797: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
798: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
799: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
800: PetscCall(VecScatterDestroy(&vecscat));
801: PetscCall(ISDestroy(&list1));
802: PetscCall(ISDestroy(&list2));
803: #else
804: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
805: #endif
806: break;
807: case 2:
808: #if defined(PETSC_USE_COMPLEX)
809: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
811: PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
812: PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
813: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
814: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
815: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
816: PetscCall(VecScatterDestroy(&vecscat));
817: PetscCall(ISDestroy(&list1));
818: PetscCall(ISDestroy(&list2));
819: #else
820: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
822: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
823: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));
825: if (dim[1] % 2 == 0) {
826: NM = dim[1] + 2;
827: } else {
828: NM = dim[1] + 1;
829: }
830: for (i = 0; i < local_n0; i++) {
831: for (j = 0; j < dim[1]; j++) {
832: tempindx = i * dim[1] + j;
833: tempindx1 = i * NM + j;
835: indx1[tempindx] = local_0_start * dim[1] + tempindx;
836: indx2[tempindx] = low + tempindx1;
837: }
838: }
840: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
841: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
843: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
844: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
845: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
846: PetscCall(VecScatterDestroy(&vecscat));
847: PetscCall(ISDestroy(&list1));
848: PetscCall(ISDestroy(&list2));
849: PetscCall(PetscFree(indx1));
850: PetscCall(PetscFree(indx2));
851: #endif
852: break;
854: case 3:
855: #if defined(PETSC_USE_COMPLEX)
856: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
858: PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
859: PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
860: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
861: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
862: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
863: PetscCall(VecScatterDestroy(&vecscat));
864: PetscCall(ISDestroy(&list1));
865: PetscCall(ISDestroy(&list2));
866: #else
867: /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
868: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
869: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
871: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
872: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));
874: if (dim[2] % 2 == 0) NM = dim[2] + 2;
875: else NM = dim[2] + 1;
877: for (i = 0; i < local_n0; i++) {
878: for (j = 0; j < dim[1]; j++) {
879: for (k = 0; k < dim[2]; k++) {
880: tempindx = i * dim[1] * dim[2] + j * dim[2] + k;
881: tempindx1 = i * dim[1] * NM + j * NM + k;
883: indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
884: indx2[tempindx] = low + tempindx1;
885: }
886: }
887: }
889: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
890: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
891: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
892: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
893: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
894: PetscCall(VecScatterDestroy(&vecscat));
895: PetscCall(ISDestroy(&list1));
896: PetscCall(ISDestroy(&list2));
897: PetscCall(PetscFree(indx1));
898: PetscCall(PetscFree(indx2));
899: #endif
900: break;
902: default:
903: #if defined(PETSC_USE_COMPLEX)
904: fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
906: PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
907: PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
908: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
909: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
910: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
911: PetscCall(VecScatterDestroy(&vecscat));
912: PetscCall(ISDestroy(&list1));
913: PetscCall(ISDestroy(&list2));
914: #else
915: /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
916: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
917: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
919: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
921: fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
923: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
925: partial_dim = fftw->partial_dim;
927: PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
928: PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));
930: if (dim[ndim - 1] % 2 == 0) NM = 2;
931: else NM = 1;
933: j = low;
934: for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
935: indx1[i] = local_0_start * partial_dim + i;
936: indx2[i] = j;
937: if (k % dim[ndim - 1] == 0) j += NM;
938: j++;
939: }
940: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
941: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
942: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
943: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
944: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
945: PetscCall(VecScatterDestroy(&vecscat));
946: PetscCall(ISDestroy(&list1));
947: PetscCall(ISDestroy(&list2));
948: PetscCall(PetscFree(indx1));
949: PetscCall(PetscFree(indx2));
950: #endif
951: break;
952: }
953: #endif
954: }
955: PetscFunctionReturn(PETSC_SUCCESS);
956: }
958: /*@
959: VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.
961: Collective
963: Input Parameters:
964: + A - `MATFFTW` matrix
965: - x - FFTW vector
967: Output Parameter:
968: . y - PETSc vector
970: Level: intermediate
972: Note:
973: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
974: `VecScatterFFTWToPetsc()` removes those extra zeros.
976: .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
977: @*/
978: PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
979: {
980: PetscFunctionBegin;
981: PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
982: PetscFunctionReturn(PETSC_SUCCESS);
983: }
985: static PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
986: {
987: MPI_Comm comm;
988: Mat_FFT *fft = (Mat_FFT *)A->data;
989: PetscInt low;
990: PetscMPIInt rank, size;
991: VecScatter vecscat;
992: IS list1;
994: PetscFunctionBegin;
995: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
996: PetscCallMPI(MPI_Comm_size(comm, &size));
997: PetscCallMPI(MPI_Comm_rank(comm, &rank));
998: PetscCall(VecGetOwnershipRange(x, &low, NULL));
1000: if (size == 1) {
1001: PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
1002: PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
1003: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1004: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1005: PetscCall(VecScatterDestroy(&vecscat));
1006: PetscCall(ISDestroy(&list1));
1008: #if !PetscDefined(HAVE_MPIUNI)
1009: } else {
1010: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
1011: PetscInt ndim = fft->ndim, *dim = fft->dim;
1012: ptrdiff_t local_n0, local_0_start;
1013: ptrdiff_t local_n1, local_1_start;
1014: IS list2;
1015: #if !defined(PETSC_USE_COMPLEX)
1016: PetscInt i, j, k, partial_dim;
1017: PetscInt *indx1, *indx2, tempindx, tempindx1;
1018: PetscInt NM;
1019: ptrdiff_t temp;
1020: #endif
1021: switch (ndim) {
1022: case 1:
1023: #if defined(PETSC_USE_COMPLEX)
1024: fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1026: PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1));
1027: PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2));
1028: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1029: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1030: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1031: PetscCall(VecScatterDestroy(&vecscat));
1032: PetscCall(ISDestroy(&list1));
1033: PetscCall(ISDestroy(&list2));
1034: #else
1035: SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
1036: #endif
1037: break;
1038: case 2:
1039: #if defined(PETSC_USE_COMPLEX)
1040: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1042: PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
1043: PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
1044: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1045: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1046: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1047: PetscCall(VecScatterDestroy(&vecscat));
1048: PetscCall(ISDestroy(&list1));
1049: PetscCall(ISDestroy(&list2));
1050: #else
1051: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1053: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
1054: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));
1056: if (dim[1] % 2 == 0) NM = dim[1] + 2;
1057: else NM = dim[1] + 1;
1059: for (i = 0; i < local_n0; i++) {
1060: for (j = 0; j < dim[1]; j++) {
1061: tempindx = i * dim[1] + j;
1062: tempindx1 = i * NM + j;
1064: indx1[tempindx] = local_0_start * dim[1] + tempindx;
1065: indx2[tempindx] = low + tempindx1;
1066: }
1067: }
1069: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
1070: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
1072: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1073: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1074: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1075: PetscCall(VecScatterDestroy(&vecscat));
1076: PetscCall(ISDestroy(&list1));
1077: PetscCall(ISDestroy(&list2));
1078: PetscCall(PetscFree(indx1));
1079: PetscCall(PetscFree(indx2));
1080: #endif
1081: break;
1082: case 3:
1083: #if defined(PETSC_USE_COMPLEX)
1084: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
1086: PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
1087: PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
1088: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1089: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1090: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1091: PetscCall(VecScatterDestroy(&vecscat));
1092: PetscCall(ISDestroy(&list1));
1093: PetscCall(ISDestroy(&list2));
1094: #else
1095: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1097: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
1098: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));
1100: if (dim[2] % 2 == 0) NM = dim[2] + 2;
1101: else NM = dim[2] + 1;
1103: for (i = 0; i < local_n0; i++) {
1104: for (j = 0; j < dim[1]; j++) {
1105: for (k = 0; k < dim[2]; k++) {
1106: tempindx = i * dim[1] * dim[2] + j * dim[2] + k;
1107: tempindx1 = i * dim[1] * NM + j * NM + k;
1109: indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
1110: indx2[tempindx] = low + tempindx1;
1111: }
1112: }
1113: }
1115: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
1116: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
1118: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1119: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1120: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1121: PetscCall(VecScatterDestroy(&vecscat));
1122: PetscCall(ISDestroy(&list1));
1123: PetscCall(ISDestroy(&list2));
1124: PetscCall(PetscFree(indx1));
1125: PetscCall(PetscFree(indx2));
1126: #endif
1127: break;
1128: default:
1129: #if defined(PETSC_USE_COMPLEX)
1130: fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
1132: PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
1133: PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
1134: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1135: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1136: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1137: PetscCall(VecScatterDestroy(&vecscat));
1138: PetscCall(ISDestroy(&list1));
1139: PetscCall(ISDestroy(&list2));
1140: #else
1141: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
1143: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
1145: fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1147: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
1149: partial_dim = fftw->partial_dim;
1151: PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
1152: PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));
1154: if (dim[ndim - 1] % 2 == 0) NM = 2;
1155: else NM = 1;
1157: j = low;
1158: for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
1159: indx1[i] = local_0_start * partial_dim + i;
1160: indx2[i] = j;
1161: if (k % dim[ndim - 1] == 0) j += NM;
1162: j++;
1163: }
1164: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
1165: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
1167: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1168: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1169: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1170: PetscCall(VecScatterDestroy(&vecscat));
1171: PetscCall(ISDestroy(&list1));
1172: PetscCall(ISDestroy(&list2));
1173: PetscCall(PetscFree(indx1));
1174: PetscCall(PetscFree(indx2));
1175: #endif
1176: break;
1177: }
1178: #endif
1179: }
1180: PetscFunctionReturn(PETSC_SUCCESS);
1181: }
1183: /*MC
1184: MATFFTW - "fftw" - Matrix type that provides FFT via the FFTW external package.
1186: Level: intermediate
1188: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()`
1189: M*/
1190: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1191: {
1192: MPI_Comm comm;
1193: Mat_FFT *fft = (Mat_FFT *)A->data;
1194: Mat_FFTW *fftw;
1195: PetscInt ndim = fft->ndim, *dim = fft->dim;
1196: const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
1197: unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1198: PetscBool flg;
1199: PetscInt p_flag, partial_dim = 1, ctr;
1200: PetscMPIInt size, rank;
1201: ptrdiff_t *pdim;
1202: #if !defined(PETSC_USE_COMPLEX)
1203: PetscInt tot_dim;
1204: #endif
1206: PetscFunctionBegin;
1207: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1208: PetscCallMPI(MPI_Comm_size(comm, &size));
1209: PetscCallMPI(MPI_Comm_rank(comm, &rank));
1211: #if !PetscDefined(HAVE_MPIUNI)
1212: fftw_mpi_init();
1213: #endif
1214: pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
1215: pdim[0] = dim[0];
1216: #if !defined(PETSC_USE_COMPLEX)
1217: tot_dim = 2 * dim[0];
1218: #endif
1219: for (ctr = 1; ctr < ndim; ctr++) {
1220: partial_dim *= dim[ctr];
1221: pdim[ctr] = dim[ctr];
1222: #if !defined(PETSC_USE_COMPLEX)
1223: if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1224: else tot_dim *= dim[ctr];
1225: #endif
1226: }
1228: if (size == 1) {
1229: #if defined(PETSC_USE_COMPLEX)
1230: PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
1231: fft->n = fft->N;
1232: #else
1233: PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
1234: fft->n = tot_dim;
1235: #endif
1236: #if !PetscDefined(HAVE_MPIUNI)
1237: } else {
1238: ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
1239: #if !defined(PETSC_USE_COMPLEX)
1240: ptrdiff_t temp;
1241: PetscInt N1;
1242: #endif
1244: switch (ndim) {
1245: case 1:
1246: #if !defined(PETSC_USE_COMPLEX)
1247: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1248: #else
1249: fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1250: fft->n = (PetscInt)local_n0;
1251: PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N));
1252: #endif
1253: break;
1254: case 2:
1255: #if defined(PETSC_USE_COMPLEX)
1256: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1257: fft->n = (PetscInt)local_n0 * dim[1];
1258: PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1259: #else
1260: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1262: fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
1263: PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
1264: #endif
1265: break;
1266: case 3:
1267: #if defined(PETSC_USE_COMPLEX)
1268: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
1270: fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
1271: PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1272: #else
1273: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1275: fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
1276: PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
1277: #endif
1278: break;
1279: default:
1280: #if defined(PETSC_USE_COMPLEX)
1281: fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);
1283: fft->n = (PetscInt)local_n0 * partial_dim;
1284: PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1285: #else
1286: temp = pdim[ndim - 1];
1288: pdim[ndim - 1] = temp / 2 + 1;
1290: fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1292: fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp;
1293: N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);
1295: pdim[ndim - 1] = temp;
1297: PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1298: #endif
1299: break;
1300: }
1301: #endif
1302: }
1303: free(pdim);
1304: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
1305: PetscCall(PetscNew(&fftw));
1306: fft->data = (void *)fftw;
1308: fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */
1309: fftw->partial_dim = partial_dim;
1311: PetscCall(PetscMalloc1(ndim, &fftw->dim_fftw));
1312: if (size == 1) {
1313: #if defined(PETSC_USE_64BIT_INDICES)
1314: fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1315: #else
1316: fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1317: #endif
1318: }
1320: for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];
1322: fftw->p_forward = NULL;
1323: fftw->p_backward = NULL;
1324: fftw->p_flag = FFTW_ESTIMATE;
1326: if (size == 1) {
1327: A->ops->mult = MatMult_SeqFFTW;
1328: A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1329: #if !PetscDefined(HAVE_MPIUNI)
1330: } else {
1331: A->ops->mult = MatMult_MPIFFTW;
1332: A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1333: #endif
1334: }
1335: fft->matdestroy = MatDestroy_FFTW;
1336: A->assembled = PETSC_TRUE;
1337: A->preallocated = PETSC_TRUE;
1339: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
1340: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
1341: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));
1343: /* get runtime options */
1344: PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
1345: PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
1346: if (flg) fftw->p_flag = iplans[p_flag];
1347: PetscOptionsEnd();
1348: PetscFunctionReturn(PETSC_SUCCESS);
1349: }