Actual source code: fftw.c
1: /*
2: Provides an interface to the FFTW package.
3: Testing examples can be found in ~src/mat/tests
4: */
6: #include <../src/mat/impls/fft/fft.h>
7: EXTERN_C_BEGIN
8: #if !PetscDefined(HAVE_MPIUNI)
9: #include <fftw3-mpi.h>
10: #else
11: #include <fftw3.h>
12: #endif
13: EXTERN_C_END
15: typedef struct {
16: PetscInt ndim_fftw;
17: ptrdiff_t *dim_fftw;
18: #if defined(PETSC_USE_64BIT_INDICES)
19: fftw_iodim64 *iodims;
20: #else
21: fftw_iodim *iodims;
22: #endif
23: PetscInt partial_dim;
24: fftw_plan p_forward, p_backward;
25: unsigned p_flag; /* planner flags, FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
26: PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw_execute() can only be executed for the arrays with which the plan was created */
27: } Mat_FFTW;
29: extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);
31: static PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
32: {
33: Mat_FFT *fft = (Mat_FFT *)A->data;
34: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
35: const PetscScalar *x_array;
36: PetscScalar *y_array;
37: Vec xx;
38: #if defined(PETSC_USE_COMPLEX)
39: #if defined(PETSC_USE_64BIT_INDICES)
40: fftw_iodim64 *iodims;
41: #else
42: fftw_iodim *iodims;
43: #endif
44: #endif
45: PetscInt ndim = fft->ndim, *dim = fft->dim;
47: PetscFunctionBegin;
48: if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
49: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
50: PetscCall(VecDuplicate(x, &xx));
51: PetscCall(VecGetArrayRead(xx, &x_array));
52: } else {
53: PetscCall(VecGetArrayRead(x, &x_array));
54: }
55: PetscCall(VecGetArray(y, &y_array));
56: if (!fftw->p_forward) { /* create a plan, then execute it */
57: switch (ndim) {
58: case 1:
59: #if defined(PETSC_USE_COMPLEX)
60: fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
61: #else
62: fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
63: #endif
64: break;
65: case 2:
66: #if defined(PETSC_USE_COMPLEX)
67: fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
68: #else
69: fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
70: #endif
71: break;
72: case 3:
73: #if defined(PETSC_USE_COMPLEX)
74: fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
75: #else
76: fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
77: #endif
78: break;
79: default:
80: #if defined(PETSC_USE_COMPLEX)
81: iodims = fftw->iodims;
82: #if defined(PETSC_USE_64BIT_INDICES)
83: if (ndim) {
84: iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1];
85: iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
86: for (PetscInt i = ndim - 2; i >= 0; --i) {
87: iodims[i].n = (ptrdiff_t)dim[i];
88: iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
89: }
90: }
91: fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
92: #else
93: if (ndim) {
94: iodims[ndim - 1].n = (int)dim[ndim - 1];
95: iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
96: for (PetscInt i = ndim - 2; i >= 0; --i) {
97: iodims[i].n = (int)dim[i];
98: iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
99: }
100: }
101: fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
102: #endif
104: #else
105: fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
106: #endif
107: break;
108: }
109: if (fftw->p_flag != FFTW_ESTIMATE) {
110: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
111: PetscCall(VecRestoreArrayRead(xx, &x_array));
112: PetscCall(VecDestroy(&xx));
113: PetscCall(VecGetArrayRead(x, &x_array));
114: } else {
115: fftw->finarray = (PetscScalar *)x_array;
116: fftw->foutarray = y_array;
117: }
118: }
120: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
121: #if defined(PETSC_USE_COMPLEX)
122: fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
123: #else
124: fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
125: #endif
126: } else {
127: fftw_execute(fftw->p_forward);
128: }
129: PetscCall(VecRestoreArray(y, &y_array));
130: PetscCall(VecRestoreArrayRead(x, &x_array));
131: PetscFunctionReturn(PETSC_SUCCESS);
132: }
134: static PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
135: {
136: Mat_FFT *fft = (Mat_FFT *)A->data;
137: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
138: const PetscScalar *x_array;
139: PetscScalar *y_array;
140: PetscInt ndim = fft->ndim, *dim = fft->dim;
141: Vec xx;
142: #if defined(PETSC_USE_COMPLEX)
143: #if defined(PETSC_USE_64BIT_INDICES)
144: fftw_iodim64 *iodims = fftw->iodims;
145: #else
146: fftw_iodim *iodims = fftw->iodims;
147: #endif
148: #endif
150: PetscFunctionBegin;
151: if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
152: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
153: PetscCall(VecDuplicate(x, &xx));
154: PetscCall(VecGetArrayRead(xx, &x_array));
155: } else {
156: PetscCall(VecGetArrayRead(x, &x_array));
157: }
158: PetscCall(VecGetArray(y, &y_array));
159: if (!fftw->p_backward) { /* create a plan, then execute it */
160: switch (ndim) {
161: case 1:
162: #if defined(PETSC_USE_COMPLEX)
163: fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
164: #else
165: fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
166: #endif
167: break;
168: case 2:
169: #if defined(PETSC_USE_COMPLEX)
170: fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
171: #else
172: fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
173: #endif
174: break;
175: case 3:
176: #if defined(PETSC_USE_COMPLEX)
177: fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
178: #else
179: fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
180: #endif
181: break;
182: default:
183: #if defined(PETSC_USE_COMPLEX)
184: #if defined(PETSC_USE_64BIT_INDICES)
185: fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
186: #else
187: fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
188: #endif
189: #else
190: fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
191: #endif
192: break;
193: }
194: if (fftw->p_flag != FFTW_ESTIMATE) {
195: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
196: PetscCall(VecRestoreArrayRead(xx, &x_array));
197: PetscCall(VecDestroy(&xx));
198: PetscCall(VecGetArrayRead(x, &x_array));
199: } else {
200: fftw->binarray = (PetscScalar *)x_array;
201: fftw->boutarray = y_array;
202: }
203: }
204: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
205: #if defined(PETSC_USE_COMPLEX)
206: fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
207: #else
208: fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
209: #endif
210: } else {
211: fftw_execute(fftw->p_backward);
212: }
213: PetscCall(VecRestoreArray(y, &y_array));
214: PetscCall(VecRestoreArrayRead(x, &x_array));
215: PetscFunctionReturn(PETSC_SUCCESS);
216: }
218: #if !PetscDefined(HAVE_MPIUNI)
219: static PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
220: {
221: Mat_FFT *fft = (Mat_FFT *)A->data;
222: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
223: const PetscScalar *x_array;
224: PetscScalar *y_array;
225: PetscInt ndim = fft->ndim, *dim = fft->dim;
226: MPI_Comm comm;
227: Vec xx;
229: PetscFunctionBegin;
230: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
231: if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
232: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
233: PetscCall(VecDuplicate(x, &xx));
234: PetscCall(VecGetArrayRead(xx, &x_array));
235: } else {
236: PetscCall(VecGetArrayRead(x, &x_array));
237: }
238: PetscCall(VecGetArray(y, &y_array));
239: if (!fftw->p_forward) { /* create a plan, then execute it */
240: switch (ndim) {
241: case 1:
242: #if defined(PETSC_USE_COMPLEX)
243: fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
244: #else
245: SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
246: #endif
247: break;
248: case 2:
249: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
250: fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
251: #else
252: fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
253: #endif
254: break;
255: case 3:
256: #if defined(PETSC_USE_COMPLEX)
257: fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
258: #else
259: fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
260: #endif
261: break;
262: default:
263: #if defined(PETSC_USE_COMPLEX)
264: fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
265: #else
266: fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
267: #endif
268: break;
269: }
270: if (fftw->p_flag != FFTW_ESTIMATE) {
271: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
272: PetscCall(VecRestoreArrayRead(xx, &x_array));
273: PetscCall(VecDestroy(&xx));
274: PetscCall(VecGetArrayRead(x, &x_array));
275: } else {
276: fftw->finarray = (PetscScalar *)x_array;
277: fftw->foutarray = y_array;
278: }
279: }
280: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
281: fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
282: } else {
283: fftw_execute(fftw->p_forward);
284: }
285: PetscCall(VecRestoreArray(y, &y_array));
286: PetscCall(VecRestoreArrayRead(x, &x_array));
287: PetscFunctionReturn(PETSC_SUCCESS);
288: }
290: static PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
291: {
292: Mat_FFT *fft = (Mat_FFT *)A->data;
293: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
294: const PetscScalar *x_array;
295: PetscScalar *y_array;
296: PetscInt ndim = fft->ndim, *dim = fft->dim;
297: MPI_Comm comm;
298: Vec xx;
300: PetscFunctionBegin;
301: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
302: if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
303: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
304: PetscCall(VecDuplicate(x, &xx));
305: PetscCall(VecGetArrayRead(xx, &x_array));
306: } else {
307: PetscCall(VecGetArrayRead(x, &x_array));
308: }
309: PetscCall(VecGetArray(y, &y_array));
310: if (!fftw->p_backward) { /* create a plan, then execute it */
311: switch (ndim) {
312: case 1:
313: #if defined(PETSC_USE_COMPLEX)
314: fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
315: #else
316: SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
317: #endif
318: break;
319: case 2:
320: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
321: fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
322: #else
323: fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
324: #endif
325: break;
326: case 3:
327: #if defined(PETSC_USE_COMPLEX)
328: fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
329: #else
330: fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
331: #endif
332: break;
333: default:
334: #if defined(PETSC_USE_COMPLEX)
335: fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
336: #else
337: fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
338: #endif
339: break;
340: }
341: if (fftw->p_flag != FFTW_ESTIMATE) {
342: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
343: PetscCall(VecRestoreArrayRead(xx, &x_array));
344: PetscCall(VecDestroy(&xx));
345: PetscCall(VecGetArrayRead(x, &x_array));
346: } else {
347: fftw->binarray = (PetscScalar *)x_array;
348: fftw->boutarray = y_array;
349: }
350: }
351: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
352: fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
353: } else {
354: fftw_execute(fftw->p_backward);
355: }
356: PetscCall(VecRestoreArray(y, &y_array));
357: PetscCall(VecRestoreArrayRead(x, &x_array));
358: PetscFunctionReturn(PETSC_SUCCESS);
359: }
360: #endif
362: static PetscErrorCode MatDestroy_FFTW(Mat A)
363: {
364: Mat_FFT *fft = (Mat_FFT *)A->data;
365: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
367: PetscFunctionBegin;
368: fftw_destroy_plan(fftw->p_forward);
369: fftw_destroy_plan(fftw->p_backward);
370: if (fftw->iodims) free(fftw->iodims);
371: PetscCall(PetscFree(fftw->dim_fftw));
372: PetscCall(PetscFree(fft->data));
373: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
374: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
375: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
376: #if !PetscDefined(HAVE_MPIUNI)
377: fftw_mpi_cleanup();
378: #endif
379: PetscFunctionReturn(PETSC_SUCCESS);
380: }
382: #if !PetscDefined(HAVE_MPIUNI)
383: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
384: static PetscErrorCode VecDestroy_MPIFFTW(Vec v)
385: {
386: PetscScalar *array;
388: PetscFunctionBegin;
389: PetscCall(VecGetArray(v, &array));
390: fftw_free((fftw_complex *)array);
391: PetscCall(VecRestoreArray(v, &array));
392: PetscCall(VecDestroy_MPI(v));
393: PetscFunctionReturn(PETSC_SUCCESS);
394: }
395: #endif
397: #if !PetscDefined(HAVE_MPIUNI)
398: static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
399: {
400: Mat A;
402: PetscFunctionBegin;
403: PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
404: PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
405: PetscFunctionReturn(PETSC_SUCCESS);
406: }
408: static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
409: {
410: Mat A;
412: PetscFunctionBegin;
413: PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
414: PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
415: PetscFunctionReturn(PETSC_SUCCESS);
416: }
418: static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
419: {
420: Mat A;
422: PetscFunctionBegin;
423: PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
424: PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
425: PetscFunctionReturn(PETSC_SUCCESS);
426: }
427: #endif
429: /*@
430: MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
431: parallel layout determined by `MATFFTW`
433: Collective
435: Input Parameter:
436: . A - the matrix
438: Output Parameters:
439: + x - (optional) input vector of forward FFTW
440: . y - (optional) output vector of forward FFTW
441: - z - (optional) output vector of backward FFTW
443: Options Database Key:
444: . -mat_fftw_plannerflags - set FFTW planner flags
446: Level: advanced
448: Notes:
449: The parallel layout of output of forward FFTW is always same as the input
450: of the backward FFTW. But parallel layout of the input vector of forward
451: FFTW might not be same as the output of backward FFTW.
453: We need to provide enough space while doing parallel real transform.
454: We need to pad extra zeros at the end of last dimension. For this reason the one needs to
455: invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
456: last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
457: depends on if the last dimension is even or odd. If the last dimension is even need to pad two
458: zeros if it is odd only one zero is needed.
460: Lastly one needs some scratch space at the end of data set in each process. alloc_local
461: figures out how much space is needed, i.e. it figures out the data+scratch space for
462: each processor and returns that.
464: Developer Notes:
465: How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?
467: .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
468: @*/
469: PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
470: {
471: PetscFunctionBegin;
472: PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
473: PetscFunctionReturn(PETSC_SUCCESS);
474: }
476: PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
477: {
478: PetscMPIInt size, rank;
479: MPI_Comm comm;
480: Mat_FFT *fft = (Mat_FFT *)A->data;
482: PetscFunctionBegin;
485: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
487: PetscCallMPI(MPI_Comm_size(comm, &size));
488: PetscCallMPI(MPI_Comm_rank(comm, &rank));
489: if (size == 1) { /* sequential case */
490: #if defined(PETSC_USE_COMPLEX)
491: if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
492: if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
493: if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
494: #else
495: if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
496: if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
497: if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
498: #endif
499: #if !PetscDefined(HAVE_MPIUNI)
500: } else { /* parallel cases */
501: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
502: PetscInt ndim = fft->ndim, *dim = fft->dim;
503: ptrdiff_t alloc_local, local_n0, local_0_start;
504: ptrdiff_t local_n1;
505: fftw_complex *data_fout;
506: ptrdiff_t local_1_start;
507: PetscInt n1, N1;
508: #if defined(PETSC_USE_COMPLEX)
509: fftw_complex *data_fin, *data_bout;
510: #else
511: double *data_finr, *data_boutr;
512: ptrdiff_t temp;
513: #endif
515: switch (ndim) {
516: case 1:
517: #if !defined(PETSC_USE_COMPLEX)
518: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
519: #else
520: alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
521: if (fin) {
522: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
523: PetscCall(PetscIntCast(local_n0, &n1));
524: PetscCall(PetscIntCast(fft->N, &N1));
525: PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (const PetscScalar *)data_fin, fin));
526: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
527: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
528: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
529: }
530: if (fout) {
531: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
532: PetscCall(PetscIntCast(local_n1, &n1));
533: PetscCall(PetscIntCast(fft->N, &N1));
534: PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (const PetscScalar *)data_fout, fout));
535: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
536: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
537: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
538: }
539: alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
540: if (bout) {
541: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
542: PetscCall(PetscIntCast(local_n1, &n1));
543: PetscCall(PetscIntCast(fft->N, &N1));
544: PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (const PetscScalar *)data_bout, bout));
545: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
546: (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
547: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
548: }
549: break;
550: #endif
551: case 2:
552: #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
553: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
554: PetscCall(PetscIntCast(2 * dim[0] * (dim[1] / 2 + 1), &N1));
555: PetscCall(PetscIntCast(2 * local_n0 * (dim[1] / 2 + 1), &n1));
556: if (fin) {
557: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
558: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
559: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
560: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
561: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
562: }
563: if (fout) {
564: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
565: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
566: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
567: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
568: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
569: }
570: if (bout) {
571: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
572: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
573: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
574: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
575: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
576: }
577: #else
578: /* Get local size */
579: alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
580: if (fin) {
581: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
582: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
583: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
584: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
585: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
586: }
587: if (fout) {
588: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
589: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
590: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
591: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
592: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
593: }
594: if (bout) {
595: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
596: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
597: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
598: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
599: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
600: }
601: #endif
602: break;
603: case 3:
604: #if !defined(PETSC_USE_COMPLEX)
605: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
606: PetscCall(PetscIntCast(2 * dim[0] * dim[1] * (dim[2] / 2 + 1), &N1));
607: PetscCall(PetscIntCast(2 * local_n0 * dim[1] * (dim[2] / 2 + 1), &n1));
608: if (fin) {
609: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
610: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
611: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
612: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
613: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
614: }
615: if (fout) {
616: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
617: PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
618: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
619: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
620: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
621: }
622: if (bout) {
623: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
624: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
625: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
626: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
627: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
628: }
629: #else
630: alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
631: if (fin) {
632: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
633: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
634: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
635: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
636: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
637: }
638: if (fout) {
639: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
640: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
641: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
642: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
643: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
644: }
645: if (bout) {
646: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
647: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
648: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
649: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
650: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
651: }
652: #endif
653: break;
654: default:
655: #if !defined(PETSC_USE_COMPLEX)
656: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
657: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
658: alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
659: N1 = 2 * fft->N * (PetscInt)(fftw->dim_fftw[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
660: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
662: if (fin) {
663: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
664: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
665: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
666: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
667: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
668: }
669: if (fout) {
670: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
671: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
672: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
673: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
674: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
675: }
676: if (bout) {
677: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
678: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
679: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
680: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
681: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
682: }
683: #else
684: alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
685: if (fin) {
686: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
687: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
688: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
689: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
690: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
691: }
692: if (fout) {
693: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
694: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
695: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
696: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
697: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
698: }
699: if (bout) {
700: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
701: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
702: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
703: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
704: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
705: }
706: #endif
707: break;
708: }
709: /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
710: v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
711: memory leaks. We void these pointers here to avoid accident uses.
712: */
713: if (fin) (*fin)->ops->replacearray = NULL;
714: if (fout) (*fout)->ops->replacearray = NULL;
715: if (bout) (*bout)->ops->replacearray = NULL;
716: #endif
717: }
718: PetscFunctionReturn(PETSC_SUCCESS);
719: }
721: /*@
722: VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.
724: Collective
726: Input Parameters:
727: + A - FFTW matrix
728: - x - the PETSc vector
730: Output Parameter:
731: . y - the FFTW vector
733: Level: intermediate
735: Note:
736: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
737: one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
738: zeros. This routine does that job by scattering operation.
740: .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
741: @*/
742: PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
743: {
744: PetscFunctionBegin;
745: PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
746: PetscFunctionReturn(PETSC_SUCCESS);
747: }
749: static PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
750: {
751: MPI_Comm comm;
752: Mat_FFT *fft = (Mat_FFT *)A->data;
753: PetscInt low;
754: PetscMPIInt rank, size;
755: PetscInt vsize, vsize1;
756: VecScatter vecscat;
757: IS list1;
759: PetscFunctionBegin;
760: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
761: PetscCallMPI(MPI_Comm_size(comm, &size));
762: PetscCallMPI(MPI_Comm_rank(comm, &rank));
763: PetscCall(VecGetOwnershipRange(y, &low, NULL));
765: if (size == 1) {
766: PetscCall(VecGetSize(x, &vsize));
767: PetscCall(VecGetSize(y, &vsize1));
768: PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
769: PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
770: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
771: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
772: PetscCall(VecScatterDestroy(&vecscat));
773: PetscCall(ISDestroy(&list1));
774: #if !PetscDefined(HAVE_MPIUNI)
775: } else {
776: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
777: PetscInt ndim = fft->ndim, *dim = fft->dim, n1;
778: ptrdiff_t local_n0, local_0_start;
779: ptrdiff_t local_n1, local_1_start;
780: IS list2;
781: #if !defined(PETSC_USE_COMPLEX)
782: PetscInt i, j, k, partial_dim;
783: PetscInt *indx1, *indx2, tempindx, tempindx1;
784: PetscInt NM;
785: ptrdiff_t temp;
786: #else
787: PetscInt nstart, nlow;
788: #endif
790: switch (ndim) {
791: case 1:
792: #if defined(PETSC_USE_COMPLEX)
793: fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
795: PetscCall(PetscIntCast(local_n0, &n1));
796: PetscCall(PetscIntCast(local_0_start, &nstart));
797: PetscCall(PetscIntCast(low, &nlow));
798: PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
799: PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
800: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
801: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
802: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
803: PetscCall(VecScatterDestroy(&vecscat));
804: PetscCall(ISDestroy(&list1));
805: PetscCall(ISDestroy(&list2));
806: #else
807: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
808: #endif
809: break;
810: case 2:
811: #if defined(PETSC_USE_COMPLEX)
812: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
814: PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
815: PetscCall(PetscIntCast(local_0_start * dim[1], &nstart));
816: PetscCall(PetscIntCast(low, &nlow));
817: PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
818: PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
819: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
820: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
821: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
822: PetscCall(VecScatterDestroy(&vecscat));
823: PetscCall(ISDestroy(&list1));
824: PetscCall(ISDestroy(&list2));
825: #else
826: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
828: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
829: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));
831: if (dim[1] % 2 == 0) {
832: NM = dim[1] + 2;
833: } else {
834: NM = dim[1] + 1;
835: }
836: for (i = 0; i < local_n0; i++) {
837: for (j = 0; j < dim[1]; j++) {
838: tempindx = i * dim[1] + j;
839: tempindx1 = i * NM + j;
841: PetscCall(PetscIntCast(local_0_start * dim[1] + tempindx, &indx1[tempindx]));
842: indx2[tempindx] = low + tempindx1;
843: }
844: }
846: PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
847: PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
848: PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));
850: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
851: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
852: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
853: PetscCall(VecScatterDestroy(&vecscat));
854: PetscCall(ISDestroy(&list1));
855: PetscCall(ISDestroy(&list2));
856: PetscCall(PetscFree(indx1));
857: PetscCall(PetscFree(indx2));
858: #endif
859: break;
861: case 3:
862: #if defined(PETSC_USE_COMPLEX)
863: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
865: PetscCall(PetscIntCast(local_n0 * dim[1] * dim[2], &n1));
866: PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2], &nstart));
867: PetscCall(PetscIntCast(low, &nlow));
868: PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
869: PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
870: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
871: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
872: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
873: PetscCall(VecScatterDestroy(&vecscat));
874: PetscCall(ISDestroy(&list1));
875: PetscCall(ISDestroy(&list2));
876: #else
877: /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
878: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
879: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
881: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
882: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));
884: if (dim[2] % 2 == 0) NM = dim[2] + 2;
885: else NM = dim[2] + 1;
887: for (i = 0; i < local_n0; i++) {
888: for (j = 0; j < dim[1]; j++) {
889: for (k = 0; k < dim[2]; k++) {
890: tempindx = i * dim[1] * dim[2] + j * dim[2] + k;
891: tempindx1 = i * dim[1] * NM + j * NM + k;
893: PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2] + tempindx, &indx1[tempindx]));
894: PetscCall(PetscIntCast(low + tempindx1, &indx2[tempindx]));
895: }
896: }
897: }
899: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
900: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
901: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
902: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
903: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
904: PetscCall(VecScatterDestroy(&vecscat));
905: PetscCall(ISDestroy(&list1));
906: PetscCall(ISDestroy(&list2));
907: PetscCall(PetscFree(indx1));
908: PetscCall(PetscFree(indx2));
909: #endif
910: break;
912: default:
913: #if defined(PETSC_USE_COMPLEX)
914: fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
916: PetscCall(PetscIntCast(local_n0 * (fftw->partial_dim), &n1));
917: PetscCall(PetscIntCast(local_0_start * (fftw->partial_dim), &nstart));
918: PetscCall(PetscIntCast(low, &nlow));
919: PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
920: PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
921: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
922: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
923: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
924: PetscCall(VecScatterDestroy(&vecscat));
925: PetscCall(ISDestroy(&list1));
926: PetscCall(ISDestroy(&list2));
927: #else
928: /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
929: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
930: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
932: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
934: fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
936: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
938: partial_dim = fftw->partial_dim;
940: PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
941: PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));
943: if (dim[ndim - 1] % 2 == 0) NM = 2;
944: else NM = 1;
946: j = low;
947: for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
948: indx1[i] = local_0_start * partial_dim + i;
949: indx2[i] = j;
950: if (k % dim[ndim - 1] == 0) j += NM;
951: j++;
952: }
953: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
954: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
955: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
956: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
957: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
958: PetscCall(VecScatterDestroy(&vecscat));
959: PetscCall(ISDestroy(&list1));
960: PetscCall(ISDestroy(&list2));
961: PetscCall(PetscFree(indx1));
962: PetscCall(PetscFree(indx2));
963: #endif
964: break;
965: }
966: #endif
967: }
968: PetscFunctionReturn(PETSC_SUCCESS);
969: }
971: /*@
972: VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.
974: Collective
976: Input Parameters:
977: + A - `MATFFTW` matrix
978: - x - FFTW vector
980: Output Parameter:
981: . y - PETSc vector
983: Level: intermediate
985: Note:
986: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
987: `VecScatterFFTWToPetsc()` removes those extra zeros.
989: .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
990: @*/
991: PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
992: {
993: PetscFunctionBegin;
994: PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
995: PetscFunctionReturn(PETSC_SUCCESS);
996: }
998: static PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
999: {
1000: MPI_Comm comm;
1001: Mat_FFT *fft = (Mat_FFT *)A->data;
1002: PetscInt low;
1003: PetscMPIInt rank, size;
1004: VecScatter vecscat;
1005: IS list1;
1007: PetscFunctionBegin;
1008: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1009: PetscCallMPI(MPI_Comm_size(comm, &size));
1010: PetscCallMPI(MPI_Comm_rank(comm, &rank));
1011: PetscCall(VecGetOwnershipRange(x, &low, NULL));
1013: if (size == 1) {
1014: PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
1015: PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
1016: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1017: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1018: PetscCall(VecScatterDestroy(&vecscat));
1019: PetscCall(ISDestroy(&list1));
1021: #if !PetscDefined(HAVE_MPIUNI)
1022: } else {
1023: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
1024: PetscInt ndim = fft->ndim, *dim = fft->dim;
1025: ptrdiff_t local_n0, local_0_start;
1026: ptrdiff_t local_n1, local_1_start;
1027: IS list2;
1028: #if !defined(PETSC_USE_COMPLEX)
1029: PetscInt i, j, k, partial_dim;
1030: PetscInt *indx1, *indx2, tempindx, tempindx1;
1031: PetscInt NM;
1032: ptrdiff_t temp;
1033: #else
1034: PetscInt nlow, nstart;
1035: #endif
1036: PetscInt n1;
1037: switch (ndim) {
1038: case 1:
1039: #if defined(PETSC_USE_COMPLEX)
1040: fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1042: PetscCall(PetscIntCast(local_n1, &n1));
1043: PetscCall(PetscIntCast(local_1_start, &nstart));
1044: PetscCall(PetscIntCast(low, &nlow));
1045: PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1046: PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
1047: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1048: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1049: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1050: PetscCall(VecScatterDestroy(&vecscat));
1051: PetscCall(ISDestroy(&list1));
1052: PetscCall(ISDestroy(&list2));
1053: #else
1054: SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
1055: #endif
1056: break;
1057: case 2:
1058: #if defined(PETSC_USE_COMPLEX)
1059: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1061: PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
1062: PetscCall(PetscIntCast(local_0_start * dim[1], &nstart));
1063: PetscCall(PetscIntCast(low, &nlow));
1064: PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1065: PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
1066: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1067: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1068: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1069: PetscCall(VecScatterDestroy(&vecscat));
1070: PetscCall(ISDestroy(&list1));
1071: PetscCall(ISDestroy(&list2));
1072: #else
1073: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1075: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
1076: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));
1078: if (dim[1] % 2 == 0) NM = dim[1] + 2;
1079: else NM = dim[1] + 1;
1081: for (i = 0; i < local_n0; i++) {
1082: for (j = 0; j < dim[1]; j++) {
1083: tempindx = i * dim[1] + j;
1084: tempindx1 = i * NM + j;
1086: PetscCall(PetscIntCast(local_0_start * dim[1] + tempindx, &indx1[tempindx]));
1087: PetscCall(PetscIntCast(low + tempindx1, &indx2[tempindx]));
1088: }
1089: }
1091: PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
1092: PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
1093: PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));
1095: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1096: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1097: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1098: PetscCall(VecScatterDestroy(&vecscat));
1099: PetscCall(ISDestroy(&list1));
1100: PetscCall(ISDestroy(&list2));
1101: PetscCall(PetscFree(indx1));
1102: PetscCall(PetscFree(indx2));
1103: #endif
1104: break;
1105: case 3:
1106: #if defined(PETSC_USE_COMPLEX)
1107: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
1109: PetscCall(PetscIntCast(local_n0 * dim[1] * dim[2], &n1));
1110: PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2], &nstart));
1111: PetscCall(PetscIntCast(low, &nlow));
1112: PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1113: PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
1114: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1115: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1116: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1117: PetscCall(VecScatterDestroy(&vecscat));
1118: PetscCall(ISDestroy(&list1));
1119: PetscCall(ISDestroy(&list2));
1120: #else
1121: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1123: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
1124: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));
1126: if (dim[2] % 2 == 0) NM = dim[2] + 2;
1127: else NM = dim[2] + 1;
1129: for (i = 0; i < local_n0; i++) {
1130: for (j = 0; j < dim[1]; j++) {
1131: for (k = 0; k < dim[2]; k++) {
1132: tempindx = i * dim[1] * dim[2] + j * dim[2] + k;
1133: tempindx1 = i * dim[1] * NM + j * NM + k;
1135: PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2] + tempindx, &indx1[tempindx]));
1136: PetscCall(PetscIntCast(low + tempindx1, &indx2[tempindx]));
1137: }
1138: }
1139: }
1141: PetscCall(PetscIntCast(local_n0 * dim[1] * dim[2], &n1));
1142: PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
1143: PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));
1145: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1146: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1147: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1148: PetscCall(VecScatterDestroy(&vecscat));
1149: PetscCall(ISDestroy(&list1));
1150: PetscCall(ISDestroy(&list2));
1151: PetscCall(PetscFree(indx1));
1152: PetscCall(PetscFree(indx2));
1153: #endif
1154: break;
1155: default:
1156: #if defined(PETSC_USE_COMPLEX)
1157: fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
1159: PetscCall(PetscIntCast(local_n0 * (fftw->partial_dim), &n1));
1160: PetscCall(PetscIntCast(local_0_start * (fftw->partial_dim), &nstart));
1161: PetscCall(PetscIntCast(low, &nlow));
1162: PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1163: PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
1164: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1165: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1166: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1167: PetscCall(VecScatterDestroy(&vecscat));
1168: PetscCall(ISDestroy(&list1));
1169: PetscCall(ISDestroy(&list2));
1170: #else
1171: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
1173: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
1175: fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1177: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
1179: partial_dim = fftw->partial_dim;
1181: PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
1182: PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));
1184: if (dim[ndim - 1] % 2 == 0) NM = 2;
1185: else NM = 1;
1187: j = low;
1188: for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
1189: PetscCall(PetscIntCast(local_0_start * partial_dim + i, &indx1[i]));
1190: indx2[i] = j;
1191: if (k % dim[ndim - 1] == 0) j += NM;
1192: j++;
1193: }
1194: PetscCall(PetscIntCast(local_n0 * partial_dim, &n1));
1195: PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
1196: PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));
1198: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1199: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1200: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1201: PetscCall(VecScatterDestroy(&vecscat));
1202: PetscCall(ISDestroy(&list1));
1203: PetscCall(ISDestroy(&list2));
1204: PetscCall(PetscFree(indx1));
1205: PetscCall(PetscFree(indx2));
1206: #endif
1207: break;
1208: }
1209: #endif
1210: }
1211: PetscFunctionReturn(PETSC_SUCCESS);
1212: }
1214: /*MC
1215: MATFFTW - "fftw" - Matrix type that provides FFT via the FFTW external package.
1217: Level: intermediate
1219: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()`
1220: M*/
1221: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1222: {
1223: MPI_Comm comm;
1224: Mat_FFT *fft = (Mat_FFT *)A->data;
1225: Mat_FFTW *fftw;
1226: PetscInt ndim = fft->ndim, *dim = fft->dim;
1227: const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
1228: unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1229: PetscBool flg;
1230: PetscInt p_flag, partial_dim = 1, ctr;
1231: PetscMPIInt size, rank;
1232: ptrdiff_t *pdim;
1233: #if !defined(PETSC_USE_COMPLEX)
1234: PetscInt tot_dim;
1235: #endif
1237: PetscFunctionBegin;
1238: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1239: PetscCallMPI(MPI_Comm_size(comm, &size));
1240: PetscCallMPI(MPI_Comm_rank(comm, &rank));
1242: #if !PetscDefined(HAVE_MPIUNI)
1243: fftw_mpi_init();
1244: #endif
1245: pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
1246: pdim[0] = dim[0];
1247: #if !defined(PETSC_USE_COMPLEX)
1248: tot_dim = 2 * dim[0];
1249: #endif
1250: for (ctr = 1; ctr < ndim; ctr++) {
1251: partial_dim *= dim[ctr];
1252: pdim[ctr] = dim[ctr];
1253: #if !defined(PETSC_USE_COMPLEX)
1254: if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1255: else tot_dim *= dim[ctr];
1256: #endif
1257: }
1259: if (size == 1) {
1260: #if defined(PETSC_USE_COMPLEX)
1261: PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
1262: fft->n = fft->N;
1263: #else
1264: PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
1265: fft->n = tot_dim;
1266: #endif
1267: #if !PetscDefined(HAVE_MPIUNI)
1268: } else {
1269: ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
1270: #if !defined(PETSC_USE_COMPLEX)
1271: ptrdiff_t temp;
1272: PetscInt N1;
1273: #else
1274: PetscInt n1;
1275: #endif
1277: switch (ndim) {
1278: case 1:
1279: #if !defined(PETSC_USE_COMPLEX)
1280: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1281: #else
1282: fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1283: PetscCall(PetscIntCast(local_n0, &fft->n));
1284: PetscCall(PetscIntCast(local_n1, &n1));
1285: PetscCall(MatSetSizes(A, n1, fft->n, fft->N, fft->N));
1286: #endif
1287: break;
1288: case 2:
1289: #if defined(PETSC_USE_COMPLEX)
1290: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1291: fft->n = (PetscInt)local_n0 * dim[1];
1292: PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1293: #else
1294: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1296: fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
1297: PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
1298: #endif
1299: break;
1300: case 3:
1301: #if defined(PETSC_USE_COMPLEX)
1302: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
1304: fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
1305: PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1306: #else
1307: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1309: fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
1310: PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
1311: #endif
1312: break;
1313: default:
1314: #if defined(PETSC_USE_COMPLEX)
1315: fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);
1317: PetscCall(PetscIntCast(local_n0 * partial_dim, &fft->n));
1318: PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1319: #else
1320: temp = pdim[ndim - 1];
1322: pdim[ndim - 1] = temp / 2 + 1;
1324: fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1326: PetscCall(PetscIntCast(2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp, &fft->n));
1327: N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);
1329: pdim[ndim - 1] = temp;
1331: PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1332: #endif
1333: break;
1334: }
1335: #endif
1336: }
1337: free(pdim);
1338: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
1339: PetscCall(PetscNew(&fftw));
1340: fft->data = (void *)fftw;
1342: fftw->ndim_fftw = ndim; /* This is dimension of fft */
1343: fftw->partial_dim = partial_dim;
1345: PetscCall(PetscMalloc1(ndim, &fftw->dim_fftw));
1346: if (size == 1) {
1347: #if defined(PETSC_USE_64BIT_INDICES)
1348: fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1349: #else
1350: fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1351: #endif
1352: }
1354: for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];
1356: fftw->p_forward = NULL;
1357: fftw->p_backward = NULL;
1358: fftw->p_flag = FFTW_ESTIMATE;
1360: if (size == 1) {
1361: A->ops->mult = MatMult_SeqFFTW;
1362: A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1363: #if !PetscDefined(HAVE_MPIUNI)
1364: } else {
1365: A->ops->mult = MatMult_MPIFFTW;
1366: A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1367: #endif
1368: }
1369: fft->matdestroy = MatDestroy_FFTW;
1370: A->assembled = PETSC_TRUE;
1371: A->preallocated = PETSC_TRUE;
1373: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
1374: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
1375: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));
1377: /* get runtime options */
1378: PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
1379: PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
1380: if (flg) fftw->p_flag = iplans[p_flag];
1381: PetscOptionsEnd();
1382: PetscFunctionReturn(PETSC_SUCCESS);
1383: }