Actual source code: fftw.c
1: /*
2: Provides an interface to the FFTW package.
3: Testing examples can be found in ~src/mat/tests
4: */
6: #include <../src/mat/impls/fft/fft.h>
7: EXTERN_C_BEGIN
8: #if !PetscDefined(HAVE_MPIUNI)
9: #include <fftw3-mpi.h>
10: #else
11: #include <fftw3.h>
12: #endif
13: EXTERN_C_END
15: typedef struct {
16: ptrdiff_t ndim_fftw, *dim_fftw;
17: #if defined(PETSC_USE_64BIT_INDICES)
18: fftw_iodim64 *iodims;
19: #else
20: fftw_iodim *iodims;
21: #endif
22: PetscInt partial_dim;
23: fftw_plan p_forward, p_backward;
24: unsigned p_flag; /* planner flags, FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
25: PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw_execute() can only be executed for the arrays with which the plan was created */
26: } Mat_FFTW;
28: extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);
30: static PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
31: {
32: Mat_FFT *fft = (Mat_FFT *)A->data;
33: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
34: const PetscScalar *x_array;
35: PetscScalar *y_array;
36: Vec xx;
37: #if defined(PETSC_USE_COMPLEX)
38: #if defined(PETSC_USE_64BIT_INDICES)
39: fftw_iodim64 *iodims;
40: #else
41: fftw_iodim *iodims;
42: #endif
43: PetscInt i;
44: #endif
45: PetscInt ndim = fft->ndim, *dim = fft->dim;
47: PetscFunctionBegin;
48: if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
49: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
50: PetscCall(VecDuplicate(x, &xx));
51: PetscCall(VecGetArrayRead(xx, &x_array));
52: } else {
53: PetscCall(VecGetArrayRead(x, &x_array));
54: }
55: PetscCall(VecGetArray(y, &y_array));
56: if (!fftw->p_forward) { /* create a plan, then execute it */
57: switch (ndim) {
58: case 1:
59: #if defined(PETSC_USE_COMPLEX)
60: fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
61: #else
62: fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
63: #endif
64: break;
65: case 2:
66: #if defined(PETSC_USE_COMPLEX)
67: fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
68: #else
69: fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
70: #endif
71: break;
72: case 3:
73: #if defined(PETSC_USE_COMPLEX)
74: fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
75: #else
76: fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
77: #endif
78: break;
79: default:
80: #if defined(PETSC_USE_COMPLEX)
81: iodims = fftw->iodims;
82: #if defined(PETSC_USE_64BIT_INDICES)
83: if (ndim) {
84: iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1];
85: iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
86: for (i = ndim - 2; i >= 0; --i) {
87: iodims[i].n = (ptrdiff_t)dim[i];
88: iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
89: }
90: }
91: fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
92: #else
93: if (ndim) {
94: iodims[ndim - 1].n = (int)dim[ndim - 1];
95: iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
96: for (i = ndim - 2; i >= 0; --i) {
97: iodims[i].n = (int)dim[i];
98: iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
99: }
100: }
101: fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
102: #endif
104: #else
105: fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
106: #endif
107: break;
108: }
109: if (fftw->p_flag != FFTW_ESTIMATE) {
110: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
111: PetscCall(VecRestoreArrayRead(xx, &x_array));
112: PetscCall(VecDestroy(&xx));
113: PetscCall(VecGetArrayRead(x, &x_array));
114: } else {
115: fftw->finarray = (PetscScalar *)x_array;
116: fftw->foutarray = y_array;
117: }
118: }
120: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
121: #if defined(PETSC_USE_COMPLEX)
122: fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
123: #else
124: fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
125: #endif
126: } else {
127: fftw_execute(fftw->p_forward);
128: }
129: PetscCall(VecRestoreArray(y, &y_array));
130: PetscCall(VecRestoreArrayRead(x, &x_array));
131: PetscFunctionReturn(PETSC_SUCCESS);
132: }
134: static PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
135: {
136: Mat_FFT *fft = (Mat_FFT *)A->data;
137: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
138: const PetscScalar *x_array;
139: PetscScalar *y_array;
140: PetscInt ndim = fft->ndim, *dim = fft->dim;
141: Vec xx;
142: #if defined(PETSC_USE_COMPLEX)
143: #if defined(PETSC_USE_64BIT_INDICES)
144: fftw_iodim64 *iodims = fftw->iodims;
145: #else
146: fftw_iodim *iodims = fftw->iodims;
147: #endif
148: #endif
150: PetscFunctionBegin;
151: if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
152: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
153: PetscCall(VecDuplicate(x, &xx));
154: PetscCall(VecGetArrayRead(xx, &x_array));
155: } else {
156: PetscCall(VecGetArrayRead(x, &x_array));
157: }
158: PetscCall(VecGetArray(y, &y_array));
159: if (!fftw->p_backward) { /* create a plan, then execute it */
160: switch (ndim) {
161: case 1:
162: #if defined(PETSC_USE_COMPLEX)
163: fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
164: #else
165: fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
166: #endif
167: break;
168: case 2:
169: #if defined(PETSC_USE_COMPLEX)
170: fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
171: #else
172: fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
173: #endif
174: break;
175: case 3:
176: #if defined(PETSC_USE_COMPLEX)
177: fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
178: #else
179: fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
180: #endif
181: break;
182: default:
183: #if defined(PETSC_USE_COMPLEX)
184: #if defined(PETSC_USE_64BIT_INDICES)
185: fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
186: #else
187: fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
188: #endif
189: #else
190: fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
191: #endif
192: break;
193: }
194: if (fftw->p_flag != FFTW_ESTIMATE) {
195: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
196: PetscCall(VecRestoreArrayRead(xx, &x_array));
197: PetscCall(VecDestroy(&xx));
198: PetscCall(VecGetArrayRead(x, &x_array));
199: } else {
200: fftw->binarray = (PetscScalar *)x_array;
201: fftw->boutarray = y_array;
202: }
203: }
204: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
205: #if defined(PETSC_USE_COMPLEX)
206: fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
207: #else
208: fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
209: #endif
210: } else {
211: fftw_execute(fftw->p_backward);
212: }
213: PetscCall(VecRestoreArray(y, &y_array));
214: PetscCall(VecRestoreArrayRead(x, &x_array));
215: PetscFunctionReturn(PETSC_SUCCESS);
216: }
218: #if !PetscDefined(HAVE_MPIUNI)
219: static PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
220: {
221: Mat_FFT *fft = (Mat_FFT *)A->data;
222: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
223: const PetscScalar *x_array;
224: PetscScalar *y_array;
225: PetscInt ndim = fft->ndim, *dim = fft->dim;
226: MPI_Comm comm;
227: Vec xx;
229: PetscFunctionBegin;
230: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
231: if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
232: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
233: PetscCall(VecDuplicate(x, &xx));
234: PetscCall(VecGetArrayRead(xx, &x_array));
235: } else {
236: PetscCall(VecGetArrayRead(x, &x_array));
237: }
238: PetscCall(VecGetArray(y, &y_array));
239: if (!fftw->p_forward) { /* create a plan, then execute it */
240: switch (ndim) {
241: case 1:
242: #if defined(PETSC_USE_COMPLEX)
243: fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
244: #else
245: SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
246: #endif
247: break;
248: case 2:
249: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
250: fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
251: #else
252: fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
253: #endif
254: break;
255: case 3:
256: #if defined(PETSC_USE_COMPLEX)
257: fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
258: #else
259: fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
260: #endif
261: break;
262: default:
263: #if defined(PETSC_USE_COMPLEX)
264: fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
265: #else
266: fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
267: #endif
268: break;
269: }
270: if (fftw->p_flag != FFTW_ESTIMATE) {
271: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
272: PetscCall(VecRestoreArrayRead(xx, &x_array));
273: PetscCall(VecDestroy(&xx));
274: PetscCall(VecGetArrayRead(x, &x_array));
275: } else {
276: fftw->finarray = (PetscScalar *)x_array;
277: fftw->foutarray = y_array;
278: }
279: }
280: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
281: fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
282: } else {
283: fftw_execute(fftw->p_forward);
284: }
285: PetscCall(VecRestoreArray(y, &y_array));
286: PetscCall(VecRestoreArrayRead(x, &x_array));
287: PetscFunctionReturn(PETSC_SUCCESS);
288: }
290: static PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
291: {
292: Mat_FFT *fft = (Mat_FFT *)A->data;
293: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
294: const PetscScalar *x_array;
295: PetscScalar *y_array;
296: PetscInt ndim = fft->ndim, *dim = fft->dim;
297: MPI_Comm comm;
298: Vec xx;
300: PetscFunctionBegin;
301: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
302: if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
303: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
304: PetscCall(VecDuplicate(x, &xx));
305: PetscCall(VecGetArrayRead(xx, &x_array));
306: } else {
307: PetscCall(VecGetArrayRead(x, &x_array));
308: }
309: PetscCall(VecGetArray(y, &y_array));
310: if (!fftw->p_backward) { /* create a plan, then execute it */
311: switch (ndim) {
312: case 1:
313: #if defined(PETSC_USE_COMPLEX)
314: fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
315: #else
316: SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
317: #endif
318: break;
319: case 2:
320: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
321: fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
322: #else
323: fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
324: #endif
325: break;
326: case 3:
327: #if defined(PETSC_USE_COMPLEX)
328: fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
329: #else
330: fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
331: #endif
332: break;
333: default:
334: #if defined(PETSC_USE_COMPLEX)
335: fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
336: #else
337: fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
338: #endif
339: break;
340: }
341: if (fftw->p_flag != FFTW_ESTIMATE) {
342: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
343: PetscCall(VecRestoreArrayRead(xx, &x_array));
344: PetscCall(VecDestroy(&xx));
345: PetscCall(VecGetArrayRead(x, &x_array));
346: } else {
347: fftw->binarray = (PetscScalar *)x_array;
348: fftw->boutarray = y_array;
349: }
350: }
351: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
352: fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
353: } else {
354: fftw_execute(fftw->p_backward);
355: }
356: PetscCall(VecRestoreArray(y, &y_array));
357: PetscCall(VecRestoreArrayRead(x, &x_array));
358: PetscFunctionReturn(PETSC_SUCCESS);
359: }
360: #endif
362: static PetscErrorCode MatDestroy_FFTW(Mat A)
363: {
364: Mat_FFT *fft = (Mat_FFT *)A->data;
365: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
367: PetscFunctionBegin;
368: fftw_destroy_plan(fftw->p_forward);
369: fftw_destroy_plan(fftw->p_backward);
370: if (fftw->iodims) free(fftw->iodims);
371: PetscCall(PetscFree(fftw->dim_fftw));
372: PetscCall(PetscFree(fft->data));
373: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
374: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
375: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
376: #if !PetscDefined(HAVE_MPIUNI)
377: fftw_mpi_cleanup();
378: #endif
379: PetscFunctionReturn(PETSC_SUCCESS);
380: }
382: #if !PetscDefined(HAVE_MPIUNI)
383: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
384: static PetscErrorCode VecDestroy_MPIFFTW(Vec v)
385: {
386: PetscScalar *array;
388: PetscFunctionBegin;
389: PetscCall(VecGetArray(v, &array));
390: fftw_free((fftw_complex *)array);
391: PetscCall(VecRestoreArray(v, &array));
392: PetscCall(VecDestroy_MPI(v));
393: PetscFunctionReturn(PETSC_SUCCESS);
394: }
395: #endif
397: #if !PetscDefined(HAVE_MPIUNI)
398: static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
399: {
400: Mat A;
402: PetscFunctionBegin;
403: PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
404: PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
405: PetscFunctionReturn(PETSC_SUCCESS);
406: }
408: static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
409: {
410: Mat A;
412: PetscFunctionBegin;
413: PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
414: PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
415: PetscFunctionReturn(PETSC_SUCCESS);
416: }
418: static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
419: {
420: Mat A;
422: PetscFunctionBegin;
423: PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
424: PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
425: PetscFunctionReturn(PETSC_SUCCESS);
426: }
427: #endif
429: /*@
430: MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
431: parallel layout determined by `MATFFTW`
433: Collective
435: Input Parameter:
436: . A - the matrix
438: Output Parameters:
439: + x - (optional) input vector of forward FFTW
440: . y - (optional) output vector of forward FFTW
441: - z - (optional) output vector of backward FFTW
443: Options Database Key:
444: . -mat_fftw_plannerflags - set FFTW planner flags
446: Level: advanced
448: Notes:
449: The parallel layout of output of forward FFTW is always same as the input
450: of the backward FFTW. But parallel layout of the input vector of forward
451: FFTW might not be same as the output of backward FFTW.
453: We need to provide enough space while doing parallel real transform.
454: We need to pad extra zeros at the end of last dimension. For this reason the one needs to
455: invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
456: last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
457: depends on if the last dimension is even or odd. If the last dimension is even need to pad two
458: zeros if it is odd only one zero is needed.
460: Lastly one needs some scratch space at the end of data set in each process. alloc_local
461: figures out how much space is needed, i.e. it figures out the data+scratch space for
462: each processor and returns that.
464: Developer Notes:
465: How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?
467: .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
468: @*/
469: PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
470: {
471: PetscFunctionBegin;
472: PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
473: PetscFunctionReturn(PETSC_SUCCESS);
474: }
476: PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
477: {
478: PetscMPIInt size, rank;
479: MPI_Comm comm;
480: Mat_FFT *fft = (Mat_FFT *)A->data;
482: PetscFunctionBegin;
485: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
487: PetscCallMPI(MPI_Comm_size(comm, &size));
488: PetscCallMPI(MPI_Comm_rank(comm, &rank));
489: if (size == 1) { /* sequential case */
490: #if defined(PETSC_USE_COMPLEX)
491: if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
492: if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
493: if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
494: #else
495: if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
496: if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
497: if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
498: #endif
499: #if !PetscDefined(HAVE_MPIUNI)
500: } else { /* parallel cases */
501: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
502: PetscInt ndim = fft->ndim, *dim = fft->dim;
503: ptrdiff_t alloc_local, local_n0, local_0_start;
504: ptrdiff_t local_n1;
505: fftw_complex *data_fout;
506: ptrdiff_t local_1_start;
507: #if defined(PETSC_USE_COMPLEX)
508: fftw_complex *data_fin, *data_bout;
509: #else
510: double *data_finr, *data_boutr;
511: PetscInt n1, N1;
512: ptrdiff_t temp;
513: #endif
515: switch (ndim) {
516: case 1:
517: #if !defined(PETSC_USE_COMPLEX)
518: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
519: #else
520: alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
521: if (fin) {
522: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
523: PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin));
524: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
525: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
526: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
527: }
528: if (fout) {
529: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
530: PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout));
531: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
532: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
533: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
534: }
535: alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
536: if (bout) {
537: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
538: PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout));
539: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
540: (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
541: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
542: }
543: break;
544: #endif
545: case 2:
546: #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
547: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
548: N1 = 2 * dim[0] * (dim[1] / 2 + 1);
549: n1 = 2 * local_n0 * (dim[1] / 2 + 1);
550: if (fin) {
551: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
552: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
553: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
554: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
555: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
556: }
557: if (fout) {
558: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
559: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
560: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
561: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
562: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
563: }
564: if (bout) {
565: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
566: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
567: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
568: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
569: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
570: }
571: #else
572: /* Get local size */
573: alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
574: if (fin) {
575: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
576: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
577: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
578: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
579: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
580: }
581: if (fout) {
582: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
583: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
584: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
585: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
586: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
587: }
588: if (bout) {
589: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
590: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
591: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
592: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
593: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
594: }
595: #endif
596: break;
597: case 3:
598: #if !defined(PETSC_USE_COMPLEX)
599: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
600: N1 = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1);
601: n1 = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1);
602: if (fin) {
603: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
604: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
605: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
606: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
607: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
608: }
609: if (fout) {
610: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
611: PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
612: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
613: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
614: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
615: }
616: if (bout) {
617: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
618: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
619: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
620: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
621: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
622: }
623: #else
624: alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
625: if (fin) {
626: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
627: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
628: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
629: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
630: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
631: }
632: if (fout) {
633: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
634: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
635: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
636: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
637: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
638: }
639: if (bout) {
640: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
641: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
642: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
643: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
644: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
645: }
646: #endif
647: break;
648: default:
649: #if !defined(PETSC_USE_COMPLEX)
650: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
651: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
652: alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
653: N1 = 2 * fft->N * (PetscInt)(fftw->dim_fftw[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
654: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
656: if (fin) {
657: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
658: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
659: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
660: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
661: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
662: }
663: if (fout) {
664: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
665: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
666: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
667: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
668: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
669: }
670: if (bout) {
671: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
672: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
673: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
674: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
675: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
676: }
677: #else
678: alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
679: if (fin) {
680: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
681: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
682: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
683: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
684: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
685: }
686: if (fout) {
687: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
688: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
689: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
690: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
691: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
692: }
693: if (bout) {
694: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
695: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
696: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
697: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
698: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
699: }
700: #endif
701: break;
702: }
703: /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
704: v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
705: memory leaks. We void these pointers here to avoid accident uses.
706: */
707: if (fin) (*fin)->ops->replacearray = NULL;
708: if (fout) (*fout)->ops->replacearray = NULL;
709: if (bout) (*bout)->ops->replacearray = NULL;
710: #endif
711: }
712: PetscFunctionReturn(PETSC_SUCCESS);
713: }
715: /*@
716: VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.
718: Collective
720: Input Parameters:
721: + A - FFTW matrix
722: - x - the PETSc vector
724: Output Parameter:
725: . y - the FFTW vector
727: Level: intermediate
729: Note:
730: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
731: one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
732: zeros. This routine does that job by scattering operation.
734: .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
735: @*/
736: PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
737: {
738: PetscFunctionBegin;
739: PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
740: PetscFunctionReturn(PETSC_SUCCESS);
741: }
743: static PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
744: {
745: MPI_Comm comm;
746: Mat_FFT *fft = (Mat_FFT *)A->data;
747: PetscInt low;
748: PetscMPIInt rank, size;
749: PetscInt vsize, vsize1;
750: VecScatter vecscat;
751: IS list1;
753: PetscFunctionBegin;
754: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
755: PetscCallMPI(MPI_Comm_size(comm, &size));
756: PetscCallMPI(MPI_Comm_rank(comm, &rank));
757: PetscCall(VecGetOwnershipRange(y, &low, NULL));
759: if (size == 1) {
760: PetscCall(VecGetSize(x, &vsize));
761: PetscCall(VecGetSize(y, &vsize1));
762: PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
763: PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
764: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
765: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
766: PetscCall(VecScatterDestroy(&vecscat));
767: PetscCall(ISDestroy(&list1));
768: #if !PetscDefined(HAVE_MPIUNI)
769: } else {
770: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
771: PetscInt ndim = fft->ndim, *dim = fft->dim;
772: ptrdiff_t local_n0, local_0_start;
773: ptrdiff_t local_n1, local_1_start;
774: IS list2;
775: #if !defined(PETSC_USE_COMPLEX)
776: PetscInt i, j, k, partial_dim;
777: PetscInt *indx1, *indx2, tempindx, tempindx1;
778: PetscInt NM;
779: ptrdiff_t temp;
780: #endif
782: switch (ndim) {
783: case 1:
784: #if defined(PETSC_USE_COMPLEX)
785: fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
787: PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1));
788: PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2));
789: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
790: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
791: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
792: PetscCall(VecScatterDestroy(&vecscat));
793: PetscCall(ISDestroy(&list1));
794: PetscCall(ISDestroy(&list2));
795: #else
796: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
797: #endif
798: break;
799: case 2:
800: #if defined(PETSC_USE_COMPLEX)
801: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
803: PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
804: PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
805: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
806: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
807: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
808: PetscCall(VecScatterDestroy(&vecscat));
809: PetscCall(ISDestroy(&list1));
810: PetscCall(ISDestroy(&list2));
811: #else
812: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
814: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
815: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));
817: if (dim[1] % 2 == 0) {
818: NM = dim[1] + 2;
819: } else {
820: NM = dim[1] + 1;
821: }
822: for (i = 0; i < local_n0; i++) {
823: for (j = 0; j < dim[1]; j++) {
824: tempindx = i * dim[1] + j;
825: tempindx1 = i * NM + j;
827: indx1[tempindx] = local_0_start * dim[1] + tempindx;
828: indx2[tempindx] = low + tempindx1;
829: }
830: }
832: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
833: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
835: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
836: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
837: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
838: PetscCall(VecScatterDestroy(&vecscat));
839: PetscCall(ISDestroy(&list1));
840: PetscCall(ISDestroy(&list2));
841: PetscCall(PetscFree(indx1));
842: PetscCall(PetscFree(indx2));
843: #endif
844: break;
846: case 3:
847: #if defined(PETSC_USE_COMPLEX)
848: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
850: PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
851: PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
852: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
853: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
854: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
855: PetscCall(VecScatterDestroy(&vecscat));
856: PetscCall(ISDestroy(&list1));
857: PetscCall(ISDestroy(&list2));
858: #else
859: /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
860: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
861: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
863: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
864: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));
866: if (dim[2] % 2 == 0) NM = dim[2] + 2;
867: else NM = dim[2] + 1;
869: for (i = 0; i < local_n0; i++) {
870: for (j = 0; j < dim[1]; j++) {
871: for (k = 0; k < dim[2]; k++) {
872: tempindx = i * dim[1] * dim[2] + j * dim[2] + k;
873: tempindx1 = i * dim[1] * NM + j * NM + k;
875: indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
876: indx2[tempindx] = low + tempindx1;
877: }
878: }
879: }
881: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
882: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
883: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
884: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
885: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
886: PetscCall(VecScatterDestroy(&vecscat));
887: PetscCall(ISDestroy(&list1));
888: PetscCall(ISDestroy(&list2));
889: PetscCall(PetscFree(indx1));
890: PetscCall(PetscFree(indx2));
891: #endif
892: break;
894: default:
895: #if defined(PETSC_USE_COMPLEX)
896: fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
898: PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
899: PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
900: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
901: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
902: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
903: PetscCall(VecScatterDestroy(&vecscat));
904: PetscCall(ISDestroy(&list1));
905: PetscCall(ISDestroy(&list2));
906: #else
907: /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
908: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
909: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
911: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
913: fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
915: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
917: partial_dim = fftw->partial_dim;
919: PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
920: PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));
922: if (dim[ndim - 1] % 2 == 0) NM = 2;
923: else NM = 1;
925: j = low;
926: for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
927: indx1[i] = local_0_start * partial_dim + i;
928: indx2[i] = j;
929: if (k % dim[ndim - 1] == 0) j += NM;
930: j++;
931: }
932: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
933: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
934: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
935: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
936: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
937: PetscCall(VecScatterDestroy(&vecscat));
938: PetscCall(ISDestroy(&list1));
939: PetscCall(ISDestroy(&list2));
940: PetscCall(PetscFree(indx1));
941: PetscCall(PetscFree(indx2));
942: #endif
943: break;
944: }
945: #endif
946: }
947: PetscFunctionReturn(PETSC_SUCCESS);
948: }
950: /*@
951: VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.
953: Collective
955: Input Parameters:
956: + A - `MATFFTW` matrix
957: - x - FFTW vector
959: Output Parameter:
960: . y - PETSc vector
962: Level: intermediate
964: Note:
965: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
966: `VecScatterFFTWToPetsc()` removes those extra zeros.
968: .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
969: @*/
970: PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
971: {
972: PetscFunctionBegin;
973: PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
974: PetscFunctionReturn(PETSC_SUCCESS);
975: }
977: static PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
978: {
979: MPI_Comm comm;
980: Mat_FFT *fft = (Mat_FFT *)A->data;
981: PetscInt low;
982: PetscMPIInt rank, size;
983: VecScatter vecscat;
984: IS list1;
986: PetscFunctionBegin;
987: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
988: PetscCallMPI(MPI_Comm_size(comm, &size));
989: PetscCallMPI(MPI_Comm_rank(comm, &rank));
990: PetscCall(VecGetOwnershipRange(x, &low, NULL));
992: if (size == 1) {
993: PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
994: PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
995: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
996: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
997: PetscCall(VecScatterDestroy(&vecscat));
998: PetscCall(ISDestroy(&list1));
1000: #if !PetscDefined(HAVE_MPIUNI)
1001: } else {
1002: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
1003: PetscInt ndim = fft->ndim, *dim = fft->dim;
1004: ptrdiff_t local_n0, local_0_start;
1005: ptrdiff_t local_n1, local_1_start;
1006: IS list2;
1007: #if !defined(PETSC_USE_COMPLEX)
1008: PetscInt i, j, k, partial_dim;
1009: PetscInt *indx1, *indx2, tempindx, tempindx1;
1010: PetscInt NM;
1011: ptrdiff_t temp;
1012: #endif
1013: switch (ndim) {
1014: case 1:
1015: #if defined(PETSC_USE_COMPLEX)
1016: fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1018: PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1));
1019: PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2));
1020: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1021: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1022: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1023: PetscCall(VecScatterDestroy(&vecscat));
1024: PetscCall(ISDestroy(&list1));
1025: PetscCall(ISDestroy(&list2));
1026: #else
1027: SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
1028: #endif
1029: break;
1030: case 2:
1031: #if defined(PETSC_USE_COMPLEX)
1032: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1034: PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
1035: PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
1036: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1037: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1038: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1039: PetscCall(VecScatterDestroy(&vecscat));
1040: PetscCall(ISDestroy(&list1));
1041: PetscCall(ISDestroy(&list2));
1042: #else
1043: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1045: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
1046: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));
1048: if (dim[1] % 2 == 0) NM = dim[1] + 2;
1049: else NM = dim[1] + 1;
1051: for (i = 0; i < local_n0; i++) {
1052: for (j = 0; j < dim[1]; j++) {
1053: tempindx = i * dim[1] + j;
1054: tempindx1 = i * NM + j;
1056: indx1[tempindx] = local_0_start * dim[1] + tempindx;
1057: indx2[tempindx] = low + tempindx1;
1058: }
1059: }
1061: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
1062: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
1064: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1065: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1066: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1067: PetscCall(VecScatterDestroy(&vecscat));
1068: PetscCall(ISDestroy(&list1));
1069: PetscCall(ISDestroy(&list2));
1070: PetscCall(PetscFree(indx1));
1071: PetscCall(PetscFree(indx2));
1072: #endif
1073: break;
1074: case 3:
1075: #if defined(PETSC_USE_COMPLEX)
1076: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
1078: PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
1079: PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
1080: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1081: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1082: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1083: PetscCall(VecScatterDestroy(&vecscat));
1084: PetscCall(ISDestroy(&list1));
1085: PetscCall(ISDestroy(&list2));
1086: #else
1087: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1089: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
1090: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));
1092: if (dim[2] % 2 == 0) NM = dim[2] + 2;
1093: else NM = dim[2] + 1;
1095: for (i = 0; i < local_n0; i++) {
1096: for (j = 0; j < dim[1]; j++) {
1097: for (k = 0; k < dim[2]; k++) {
1098: tempindx = i * dim[1] * dim[2] + j * dim[2] + k;
1099: tempindx1 = i * dim[1] * NM + j * NM + k;
1101: indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
1102: indx2[tempindx] = low + tempindx1;
1103: }
1104: }
1105: }
1107: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
1108: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
1110: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1111: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1112: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1113: PetscCall(VecScatterDestroy(&vecscat));
1114: PetscCall(ISDestroy(&list1));
1115: PetscCall(ISDestroy(&list2));
1116: PetscCall(PetscFree(indx1));
1117: PetscCall(PetscFree(indx2));
1118: #endif
1119: break;
1120: default:
1121: #if defined(PETSC_USE_COMPLEX)
1122: fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
1124: PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
1125: PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
1126: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1127: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1128: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1129: PetscCall(VecScatterDestroy(&vecscat));
1130: PetscCall(ISDestroy(&list1));
1131: PetscCall(ISDestroy(&list2));
1132: #else
1133: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
1135: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
1137: fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1139: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
1141: partial_dim = fftw->partial_dim;
1143: PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
1144: PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));
1146: if (dim[ndim - 1] % 2 == 0) NM = 2;
1147: else NM = 1;
1149: j = low;
1150: for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
1151: indx1[i] = local_0_start * partial_dim + i;
1152: indx2[i] = j;
1153: if (k % dim[ndim - 1] == 0) j += NM;
1154: j++;
1155: }
1156: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
1157: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
1159: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1160: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1161: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1162: PetscCall(VecScatterDestroy(&vecscat));
1163: PetscCall(ISDestroy(&list1));
1164: PetscCall(ISDestroy(&list2));
1165: PetscCall(PetscFree(indx1));
1166: PetscCall(PetscFree(indx2));
1167: #endif
1168: break;
1169: }
1170: #endif
1171: }
1172: PetscFunctionReturn(PETSC_SUCCESS);
1173: }
1175: /*MC
1176: MATFFTW - "fftw" - Matrix type that provides FFT via the FFTW external package.
1178: Level: intermediate
1180: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()`
1181: M*/
1182: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1183: {
1184: MPI_Comm comm;
1185: Mat_FFT *fft = (Mat_FFT *)A->data;
1186: Mat_FFTW *fftw;
1187: PetscInt ndim = fft->ndim, *dim = fft->dim;
1188: const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
1189: unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1190: PetscBool flg;
1191: PetscInt p_flag, partial_dim = 1, ctr;
1192: PetscMPIInt size, rank;
1193: ptrdiff_t *pdim;
1194: #if !defined(PETSC_USE_COMPLEX)
1195: PetscInt tot_dim;
1196: #endif
1198: PetscFunctionBegin;
1199: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1200: PetscCallMPI(MPI_Comm_size(comm, &size));
1201: PetscCallMPI(MPI_Comm_rank(comm, &rank));
1203: #if !PetscDefined(HAVE_MPIUNI)
1204: fftw_mpi_init();
1205: #endif
1206: pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
1207: pdim[0] = dim[0];
1208: #if !defined(PETSC_USE_COMPLEX)
1209: tot_dim = 2 * dim[0];
1210: #endif
1211: for (ctr = 1; ctr < ndim; ctr++) {
1212: partial_dim *= dim[ctr];
1213: pdim[ctr] = dim[ctr];
1214: #if !defined(PETSC_USE_COMPLEX)
1215: if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1216: else tot_dim *= dim[ctr];
1217: #endif
1218: }
1220: if (size == 1) {
1221: #if defined(PETSC_USE_COMPLEX)
1222: PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
1223: fft->n = fft->N;
1224: #else
1225: PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
1226: fft->n = tot_dim;
1227: #endif
1228: #if !PetscDefined(HAVE_MPIUNI)
1229: } else {
1230: ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
1231: #if !defined(PETSC_USE_COMPLEX)
1232: ptrdiff_t temp;
1233: PetscInt N1;
1234: #endif
1236: switch (ndim) {
1237: case 1:
1238: #if !defined(PETSC_USE_COMPLEX)
1239: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1240: #else
1241: fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1242: fft->n = (PetscInt)local_n0;
1243: PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N));
1244: #endif
1245: break;
1246: case 2:
1247: #if defined(PETSC_USE_COMPLEX)
1248: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1249: fft->n = (PetscInt)local_n0 * dim[1];
1250: PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1251: #else
1252: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1254: fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
1255: PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
1256: #endif
1257: break;
1258: case 3:
1259: #if defined(PETSC_USE_COMPLEX)
1260: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
1262: fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
1263: PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1264: #else
1265: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1267: fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
1268: PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
1269: #endif
1270: break;
1271: default:
1272: #if defined(PETSC_USE_COMPLEX)
1273: fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);
1275: fft->n = (PetscInt)local_n0 * partial_dim;
1276: PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1277: #else
1278: temp = pdim[ndim - 1];
1280: pdim[ndim - 1] = temp / 2 + 1;
1282: fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1284: fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp;
1285: N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);
1287: pdim[ndim - 1] = temp;
1289: PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1290: #endif
1291: break;
1292: }
1293: #endif
1294: }
1295: free(pdim);
1296: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
1297: PetscCall(PetscNew(&fftw));
1298: fft->data = (void *)fftw;
1300: fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */
1301: fftw->partial_dim = partial_dim;
1303: PetscCall(PetscMalloc1(ndim, &fftw->dim_fftw));
1304: if (size == 1) {
1305: #if defined(PETSC_USE_64BIT_INDICES)
1306: fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1307: #else
1308: fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1309: #endif
1310: }
1312: for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];
1314: fftw->p_forward = NULL;
1315: fftw->p_backward = NULL;
1316: fftw->p_flag = FFTW_ESTIMATE;
1318: if (size == 1) {
1319: A->ops->mult = MatMult_SeqFFTW;
1320: A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1321: #if !PetscDefined(HAVE_MPIUNI)
1322: } else {
1323: A->ops->mult = MatMult_MPIFFTW;
1324: A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1325: #endif
1326: }
1327: fft->matdestroy = MatDestroy_FFTW;
1328: A->assembled = PETSC_TRUE;
1329: A->preallocated = PETSC_TRUE;
1331: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
1332: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
1333: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));
1335: /* get runtime options */
1336: PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
1337: PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
1338: if (flg) fftw->p_flag = iplans[p_flag];
1339: PetscOptionsEnd();
1340: PetscFunctionReturn(PETSC_SUCCESS);
1341: }