Actual source code: fftw.c
1: /*
2: Provides an interface to the FFTW package.
3: Testing examples can be found in ~src/mat/tests
4: */
6: #include <../src/mat/impls/fft/fft.h>
7: EXTERN_C_BEGIN
8: #if !PetscDefined(HAVE_MPIUNI)
9: #include <fftw3-mpi.h>
10: #else
11: #include <fftw3.h>
12: #endif
13: EXTERN_C_END
15: typedef struct {
16: PetscInt ndim_fftw;
17: ptrdiff_t *dim_fftw;
18: #if defined(PETSC_USE_64BIT_INDICES)
19: fftw_iodim64 *iodims;
20: #else
21: fftw_iodim *iodims;
22: #endif
23: PetscInt partial_dim;
24: fftw_plan p_forward, p_backward;
25: unsigned p_flag; /* planner flags, FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
26: PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw_execute() can only be executed for the arrays with which the plan was created */
27: } Mat_FFTW;
29: extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);
31: static PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
32: {
33: Mat_FFT *fft = (Mat_FFT *)A->data;
34: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
35: const PetscScalar *x_array;
36: PetscScalar *y_array;
37: Vec xx;
38: #if defined(PETSC_USE_COMPLEX)
39: #if defined(PETSC_USE_64BIT_INDICES)
40: fftw_iodim64 *iodims;
41: #else
42: fftw_iodim *iodims;
43: #endif
44: PetscInt i;
45: #endif
46: PetscInt ndim = fft->ndim, *dim = fft->dim;
48: PetscFunctionBegin;
49: if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
50: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
51: PetscCall(VecDuplicate(x, &xx));
52: PetscCall(VecGetArrayRead(xx, &x_array));
53: } else {
54: PetscCall(VecGetArrayRead(x, &x_array));
55: }
56: PetscCall(VecGetArray(y, &y_array));
57: if (!fftw->p_forward) { /* create a plan, then execute it */
58: switch (ndim) {
59: case 1:
60: #if defined(PETSC_USE_COMPLEX)
61: fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
62: #else
63: fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
64: #endif
65: break;
66: case 2:
67: #if defined(PETSC_USE_COMPLEX)
68: fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
69: #else
70: fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
71: #endif
72: break;
73: case 3:
74: #if defined(PETSC_USE_COMPLEX)
75: fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
76: #else
77: fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
78: #endif
79: break;
80: default:
81: #if defined(PETSC_USE_COMPLEX)
82: iodims = fftw->iodims;
83: #if defined(PETSC_USE_64BIT_INDICES)
84: if (ndim) {
85: iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1];
86: iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
87: for (i = ndim - 2; i >= 0; --i) {
88: iodims[i].n = (ptrdiff_t)dim[i];
89: iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
90: }
91: }
92: fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
93: #else
94: if (ndim) {
95: iodims[ndim - 1].n = (int)dim[ndim - 1];
96: iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
97: for (i = ndim - 2; i >= 0; --i) {
98: iodims[i].n = (int)dim[i];
99: iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
100: }
101: }
102: fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
103: #endif
105: #else
106: fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
107: #endif
108: break;
109: }
110: if (fftw->p_flag != FFTW_ESTIMATE) {
111: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
112: PetscCall(VecRestoreArrayRead(xx, &x_array));
113: PetscCall(VecDestroy(&xx));
114: PetscCall(VecGetArrayRead(x, &x_array));
115: } else {
116: fftw->finarray = (PetscScalar *)x_array;
117: fftw->foutarray = y_array;
118: }
119: }
121: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
122: #if defined(PETSC_USE_COMPLEX)
123: fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
124: #else
125: fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
126: #endif
127: } else {
128: fftw_execute(fftw->p_forward);
129: }
130: PetscCall(VecRestoreArray(y, &y_array));
131: PetscCall(VecRestoreArrayRead(x, &x_array));
132: PetscFunctionReturn(PETSC_SUCCESS);
133: }
135: static PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
136: {
137: Mat_FFT *fft = (Mat_FFT *)A->data;
138: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
139: const PetscScalar *x_array;
140: PetscScalar *y_array;
141: PetscInt ndim = fft->ndim, *dim = fft->dim;
142: Vec xx;
143: #if defined(PETSC_USE_COMPLEX)
144: #if defined(PETSC_USE_64BIT_INDICES)
145: fftw_iodim64 *iodims = fftw->iodims;
146: #else
147: fftw_iodim *iodims = fftw->iodims;
148: #endif
149: #endif
151: PetscFunctionBegin;
152: if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
153: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
154: PetscCall(VecDuplicate(x, &xx));
155: PetscCall(VecGetArrayRead(xx, &x_array));
156: } else {
157: PetscCall(VecGetArrayRead(x, &x_array));
158: }
159: PetscCall(VecGetArray(y, &y_array));
160: if (!fftw->p_backward) { /* create a plan, then execute it */
161: switch (ndim) {
162: case 1:
163: #if defined(PETSC_USE_COMPLEX)
164: fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
165: #else
166: fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
167: #endif
168: break;
169: case 2:
170: #if defined(PETSC_USE_COMPLEX)
171: fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
172: #else
173: fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
174: #endif
175: break;
176: case 3:
177: #if defined(PETSC_USE_COMPLEX)
178: fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
179: #else
180: fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
181: #endif
182: break;
183: default:
184: #if defined(PETSC_USE_COMPLEX)
185: #if defined(PETSC_USE_64BIT_INDICES)
186: fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
187: #else
188: fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
189: #endif
190: #else
191: fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
192: #endif
193: break;
194: }
195: if (fftw->p_flag != FFTW_ESTIMATE) {
196: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
197: PetscCall(VecRestoreArrayRead(xx, &x_array));
198: PetscCall(VecDestroy(&xx));
199: PetscCall(VecGetArrayRead(x, &x_array));
200: } else {
201: fftw->binarray = (PetscScalar *)x_array;
202: fftw->boutarray = y_array;
203: }
204: }
205: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
206: #if defined(PETSC_USE_COMPLEX)
207: fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
208: #else
209: fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
210: #endif
211: } else {
212: fftw_execute(fftw->p_backward);
213: }
214: PetscCall(VecRestoreArray(y, &y_array));
215: PetscCall(VecRestoreArrayRead(x, &x_array));
216: PetscFunctionReturn(PETSC_SUCCESS);
217: }
219: #if !PetscDefined(HAVE_MPIUNI)
220: static PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
221: {
222: Mat_FFT *fft = (Mat_FFT *)A->data;
223: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
224: const PetscScalar *x_array;
225: PetscScalar *y_array;
226: PetscInt ndim = fft->ndim, *dim = fft->dim;
227: MPI_Comm comm;
228: Vec xx;
230: PetscFunctionBegin;
231: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
232: if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
233: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
234: PetscCall(VecDuplicate(x, &xx));
235: PetscCall(VecGetArrayRead(xx, &x_array));
236: } else {
237: PetscCall(VecGetArrayRead(x, &x_array));
238: }
239: PetscCall(VecGetArray(y, &y_array));
240: if (!fftw->p_forward) { /* create a plan, then execute it */
241: switch (ndim) {
242: case 1:
243: #if defined(PETSC_USE_COMPLEX)
244: fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
245: #else
246: SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
247: #endif
248: break;
249: case 2:
250: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
251: fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
252: #else
253: fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
254: #endif
255: break;
256: case 3:
257: #if defined(PETSC_USE_COMPLEX)
258: fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
259: #else
260: fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
261: #endif
262: break;
263: default:
264: #if defined(PETSC_USE_COMPLEX)
265: fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
266: #else
267: fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
268: #endif
269: break;
270: }
271: if (fftw->p_flag != FFTW_ESTIMATE) {
272: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
273: PetscCall(VecRestoreArrayRead(xx, &x_array));
274: PetscCall(VecDestroy(&xx));
275: PetscCall(VecGetArrayRead(x, &x_array));
276: } else {
277: fftw->finarray = (PetscScalar *)x_array;
278: fftw->foutarray = y_array;
279: }
280: }
281: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
282: fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
283: } else {
284: fftw_execute(fftw->p_forward);
285: }
286: PetscCall(VecRestoreArray(y, &y_array));
287: PetscCall(VecRestoreArrayRead(x, &x_array));
288: PetscFunctionReturn(PETSC_SUCCESS);
289: }
291: static PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
292: {
293: Mat_FFT *fft = (Mat_FFT *)A->data;
294: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
295: const PetscScalar *x_array;
296: PetscScalar *y_array;
297: PetscInt ndim = fft->ndim, *dim = fft->dim;
298: MPI_Comm comm;
299: Vec xx;
301: PetscFunctionBegin;
302: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
303: if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
304: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
305: PetscCall(VecDuplicate(x, &xx));
306: PetscCall(VecGetArrayRead(xx, &x_array));
307: } else {
308: PetscCall(VecGetArrayRead(x, &x_array));
309: }
310: PetscCall(VecGetArray(y, &y_array));
311: if (!fftw->p_backward) { /* create a plan, then execute it */
312: switch (ndim) {
313: case 1:
314: #if defined(PETSC_USE_COMPLEX)
315: fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
316: #else
317: SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
318: #endif
319: break;
320: case 2:
321: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
322: fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
323: #else
324: fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
325: #endif
326: break;
327: case 3:
328: #if defined(PETSC_USE_COMPLEX)
329: fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
330: #else
331: fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
332: #endif
333: break;
334: default:
335: #if defined(PETSC_USE_COMPLEX)
336: fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
337: #else
338: fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
339: #endif
340: break;
341: }
342: if (fftw->p_flag != FFTW_ESTIMATE) {
343: /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
344: PetscCall(VecRestoreArrayRead(xx, &x_array));
345: PetscCall(VecDestroy(&xx));
346: PetscCall(VecGetArrayRead(x, &x_array));
347: } else {
348: fftw->binarray = (PetscScalar *)x_array;
349: fftw->boutarray = y_array;
350: }
351: }
352: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
353: fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
354: } else {
355: fftw_execute(fftw->p_backward);
356: }
357: PetscCall(VecRestoreArray(y, &y_array));
358: PetscCall(VecRestoreArrayRead(x, &x_array));
359: PetscFunctionReturn(PETSC_SUCCESS);
360: }
361: #endif
363: static PetscErrorCode MatDestroy_FFTW(Mat A)
364: {
365: Mat_FFT *fft = (Mat_FFT *)A->data;
366: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
368: PetscFunctionBegin;
369: fftw_destroy_plan(fftw->p_forward);
370: fftw_destroy_plan(fftw->p_backward);
371: if (fftw->iodims) free(fftw->iodims);
372: PetscCall(PetscFree(fftw->dim_fftw));
373: PetscCall(PetscFree(fft->data));
374: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
375: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
376: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
377: #if !PetscDefined(HAVE_MPIUNI)
378: fftw_mpi_cleanup();
379: #endif
380: PetscFunctionReturn(PETSC_SUCCESS);
381: }
383: #if !PetscDefined(HAVE_MPIUNI)
384: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
385: static PetscErrorCode VecDestroy_MPIFFTW(Vec v)
386: {
387: PetscScalar *array;
389: PetscFunctionBegin;
390: PetscCall(VecGetArray(v, &array));
391: fftw_free((fftw_complex *)array);
392: PetscCall(VecRestoreArray(v, &array));
393: PetscCall(VecDestroy_MPI(v));
394: PetscFunctionReturn(PETSC_SUCCESS);
395: }
396: #endif
398: #if !PetscDefined(HAVE_MPIUNI)
399: static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
400: {
401: Mat A;
403: PetscFunctionBegin;
404: PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
405: PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
406: PetscFunctionReturn(PETSC_SUCCESS);
407: }
409: static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
410: {
411: Mat A;
413: PetscFunctionBegin;
414: PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
415: PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
416: PetscFunctionReturn(PETSC_SUCCESS);
417: }
419: static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
420: {
421: Mat A;
423: PetscFunctionBegin;
424: PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
425: PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
426: PetscFunctionReturn(PETSC_SUCCESS);
427: }
428: #endif
430: /*@
431: MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
432: parallel layout determined by `MATFFTW`
434: Collective
436: Input Parameter:
437: . A - the matrix
439: Output Parameters:
440: + x - (optional) input vector of forward FFTW
441: . y - (optional) output vector of forward FFTW
442: - z - (optional) output vector of backward FFTW
444: Options Database Key:
445: . -mat_fftw_plannerflags - set FFTW planner flags
447: Level: advanced
449: Notes:
450: The parallel layout of output of forward FFTW is always same as the input
451: of the backward FFTW. But parallel layout of the input vector of forward
452: FFTW might not be same as the output of backward FFTW.
454: We need to provide enough space while doing parallel real transform.
455: We need to pad extra zeros at the end of last dimension. For this reason the one needs to
456: invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
457: last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
458: depends on if the last dimension is even or odd. If the last dimension is even need to pad two
459: zeros if it is odd only one zero is needed.
461: Lastly one needs some scratch space at the end of data set in each process. alloc_local
462: figures out how much space is needed, i.e. it figures out the data+scratch space for
463: each processor and returns that.
465: Developer Notes:
466: How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?
468: .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
469: @*/
470: PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
471: {
472: PetscFunctionBegin;
473: PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
474: PetscFunctionReturn(PETSC_SUCCESS);
475: }
477: PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
478: {
479: PetscMPIInt size, rank;
480: MPI_Comm comm;
481: Mat_FFT *fft = (Mat_FFT *)A->data;
483: PetscFunctionBegin;
486: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
488: PetscCallMPI(MPI_Comm_size(comm, &size));
489: PetscCallMPI(MPI_Comm_rank(comm, &rank));
490: if (size == 1) { /* sequential case */
491: #if defined(PETSC_USE_COMPLEX)
492: if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
493: if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
494: if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
495: #else
496: if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
497: if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
498: if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
499: #endif
500: #if !PetscDefined(HAVE_MPIUNI)
501: } else { /* parallel cases */
502: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
503: PetscInt ndim = fft->ndim, *dim = fft->dim;
504: ptrdiff_t alloc_local, local_n0, local_0_start;
505: ptrdiff_t local_n1;
506: fftw_complex *data_fout;
507: ptrdiff_t local_1_start;
508: PetscInt n1, N1;
509: #if defined(PETSC_USE_COMPLEX)
510: fftw_complex *data_fin, *data_bout;
511: #else
512: double *data_finr, *data_boutr;
513: ptrdiff_t temp;
514: #endif
516: switch (ndim) {
517: case 1:
518: #if !defined(PETSC_USE_COMPLEX)
519: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
520: #else
521: alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
522: if (fin) {
523: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
524: PetscCall(PetscIntCast(local_n0, &n1));
525: PetscCall(PetscIntCast(fft->N, &N1));
526: PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (const PetscScalar *)data_fin, fin));
527: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
528: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
529: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
530: }
531: if (fout) {
532: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
533: PetscCall(PetscIntCast(local_n1, &n1));
534: PetscCall(PetscIntCast(fft->N, &N1));
535: PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (const PetscScalar *)data_fout, fout));
536: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
537: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
538: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
539: }
540: alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
541: if (bout) {
542: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
543: PetscCall(PetscIntCast(local_n1, &n1));
544: PetscCall(PetscIntCast(fft->N, &N1));
545: PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (const PetscScalar *)data_bout, bout));
546: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
547: (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
548: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
549: }
550: break;
551: #endif
552: case 2:
553: #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
554: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
555: PetscCall(PetscIntCast(2 * dim[0] * (dim[1] / 2 + 1), &N1));
556: PetscCall(PetscIntCast(2 * local_n0 * (dim[1] / 2 + 1), &n1));
557: if (fin) {
558: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
559: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
560: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
561: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
562: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
563: }
564: if (fout) {
565: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
566: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
567: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
568: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
569: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
570: }
571: if (bout) {
572: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
573: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
574: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
575: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
576: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
577: }
578: #else
579: /* Get local size */
580: alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
581: if (fin) {
582: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
583: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
584: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
585: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
586: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
587: }
588: if (fout) {
589: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
590: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
591: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
592: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
593: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
594: }
595: if (bout) {
596: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
597: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
598: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
599: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
600: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
601: }
602: #endif
603: break;
604: case 3:
605: #if !defined(PETSC_USE_COMPLEX)
606: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
607: PetscCall(PetscIntCast(2 * dim[0] * dim[1] * (dim[2] / 2 + 1), &N1));
608: PetscCall(PetscIntCast(2 * local_n0 * dim[1] * (dim[2] / 2 + 1), &n1));
609: if (fin) {
610: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
611: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
612: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
613: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
614: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
615: }
616: if (fout) {
617: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
618: PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
619: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
620: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
621: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
622: }
623: if (bout) {
624: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
625: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
626: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
627: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
628: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
629: }
630: #else
631: alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
632: if (fin) {
633: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
634: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
635: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
636: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
637: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
638: }
639: if (fout) {
640: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
641: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
642: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
643: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
644: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
645: }
646: if (bout) {
647: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
648: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
649: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
650: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
651: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
652: }
653: #endif
654: break;
655: default:
656: #if !defined(PETSC_USE_COMPLEX)
657: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
658: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
659: alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
660: N1 = 2 * fft->N * (PetscInt)(fftw->dim_fftw[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
661: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
663: if (fin) {
664: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
665: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
666: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
667: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
668: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
669: }
670: if (fout) {
671: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
672: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
673: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
674: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
675: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
676: }
677: if (bout) {
678: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
679: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
680: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
681: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
682: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
683: }
684: #else
685: alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
686: if (fin) {
687: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
688: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
689: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
690: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
691: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
692: }
693: if (fout) {
694: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
695: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
696: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
697: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
698: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
699: }
700: if (bout) {
701: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
702: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
703: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
704: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
705: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
706: }
707: #endif
708: break;
709: }
710: /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
711: v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
712: memory leaks. We void these pointers here to avoid accident uses.
713: */
714: if (fin) (*fin)->ops->replacearray = NULL;
715: if (fout) (*fout)->ops->replacearray = NULL;
716: if (bout) (*bout)->ops->replacearray = NULL;
717: #endif
718: }
719: PetscFunctionReturn(PETSC_SUCCESS);
720: }
722: /*@
723: VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.
725: Collective
727: Input Parameters:
728: + A - FFTW matrix
729: - x - the PETSc vector
731: Output Parameter:
732: . y - the FFTW vector
734: Level: intermediate
736: Note:
737: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
738: one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
739: zeros. This routine does that job by scattering operation.
741: .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
742: @*/
743: PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
744: {
745: PetscFunctionBegin;
746: PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
747: PetscFunctionReturn(PETSC_SUCCESS);
748: }
750: static PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
751: {
752: MPI_Comm comm;
753: Mat_FFT *fft = (Mat_FFT *)A->data;
754: PetscInt low;
755: PetscMPIInt rank, size;
756: PetscInt vsize, vsize1;
757: VecScatter vecscat;
758: IS list1;
760: PetscFunctionBegin;
761: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
762: PetscCallMPI(MPI_Comm_size(comm, &size));
763: PetscCallMPI(MPI_Comm_rank(comm, &rank));
764: PetscCall(VecGetOwnershipRange(y, &low, NULL));
766: if (size == 1) {
767: PetscCall(VecGetSize(x, &vsize));
768: PetscCall(VecGetSize(y, &vsize1));
769: PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
770: PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
771: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
772: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
773: PetscCall(VecScatterDestroy(&vecscat));
774: PetscCall(ISDestroy(&list1));
775: #if !PetscDefined(HAVE_MPIUNI)
776: } else {
777: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
778: PetscInt ndim = fft->ndim, *dim = fft->dim, n1;
779: ptrdiff_t local_n0, local_0_start;
780: ptrdiff_t local_n1, local_1_start;
781: IS list2;
782: #if !defined(PETSC_USE_COMPLEX)
783: PetscInt i, j, k, partial_dim;
784: PetscInt *indx1, *indx2, tempindx, tempindx1;
785: PetscInt NM;
786: ptrdiff_t temp;
787: #else
788: PetscInt nstart, nlow;
789: #endif
791: switch (ndim) {
792: case 1:
793: #if defined(PETSC_USE_COMPLEX)
794: fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
796: PetscCall(PetscIntCast(local_n0, &n1));
797: PetscCall(PetscIntCast(local_0_start, &nstart));
798: PetscCall(PetscIntCast(low, &nlow));
799: PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
800: PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
801: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
802: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
803: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
804: PetscCall(VecScatterDestroy(&vecscat));
805: PetscCall(ISDestroy(&list1));
806: PetscCall(ISDestroy(&list2));
807: #else
808: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
809: #endif
810: break;
811: case 2:
812: #if defined(PETSC_USE_COMPLEX)
813: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
815: PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
816: PetscCall(PetscIntCast(local_0_start * dim[1], &nstart));
817: PetscCall(PetscIntCast(low, &nlow));
818: PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
819: PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
820: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
821: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
822: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
823: PetscCall(VecScatterDestroy(&vecscat));
824: PetscCall(ISDestroy(&list1));
825: PetscCall(ISDestroy(&list2));
826: #else
827: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
829: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
830: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));
832: if (dim[1] % 2 == 0) {
833: NM = dim[1] + 2;
834: } else {
835: NM = dim[1] + 1;
836: }
837: for (i = 0; i < local_n0; i++) {
838: for (j = 0; j < dim[1]; j++) {
839: tempindx = i * dim[1] + j;
840: tempindx1 = i * NM + j;
842: PetscCall(PetscIntCast(local_0_start * dim[1] + tempindx, &indx1[tempindx]));
843: indx2[tempindx] = low + tempindx1;
844: }
845: }
847: PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
848: PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
849: PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));
851: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
852: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
853: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
854: PetscCall(VecScatterDestroy(&vecscat));
855: PetscCall(ISDestroy(&list1));
856: PetscCall(ISDestroy(&list2));
857: PetscCall(PetscFree(indx1));
858: PetscCall(PetscFree(indx2));
859: #endif
860: break;
862: case 3:
863: #if defined(PETSC_USE_COMPLEX)
864: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
866: PetscCall(PetscIntCast(local_n0 * dim[1] * dim[2], &n1));
867: PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2], &nstart));
868: PetscCall(PetscIntCast(low, &nlow));
869: PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
870: PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
871: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
872: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
873: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
874: PetscCall(VecScatterDestroy(&vecscat));
875: PetscCall(ISDestroy(&list1));
876: PetscCall(ISDestroy(&list2));
877: #else
878: /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
879: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
880: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
882: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
883: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));
885: if (dim[2] % 2 == 0) NM = dim[2] + 2;
886: else NM = dim[2] + 1;
888: for (i = 0; i < local_n0; i++) {
889: for (j = 0; j < dim[1]; j++) {
890: for (k = 0; k < dim[2]; k++) {
891: tempindx = i * dim[1] * dim[2] + j * dim[2] + k;
892: tempindx1 = i * dim[1] * NM + j * NM + k;
894: PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2] + tempindx, &indx1[tempindx]));
895: PetscCall(PetscIntCast(low + tempindx1, &indx2[tempindx]));
896: }
897: }
898: }
900: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
901: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
902: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
903: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
904: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
905: PetscCall(VecScatterDestroy(&vecscat));
906: PetscCall(ISDestroy(&list1));
907: PetscCall(ISDestroy(&list2));
908: PetscCall(PetscFree(indx1));
909: PetscCall(PetscFree(indx2));
910: #endif
911: break;
913: default:
914: #if defined(PETSC_USE_COMPLEX)
915: fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
917: PetscCall(PetscIntCast(local_n0 * (fftw->partial_dim), &n1));
918: PetscCall(PetscIntCast(local_0_start * (fftw->partial_dim), &nstart));
919: PetscCall(PetscIntCast(low, &nlow));
920: PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
921: PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
922: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
923: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
924: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
925: PetscCall(VecScatterDestroy(&vecscat));
926: PetscCall(ISDestroy(&list1));
927: PetscCall(ISDestroy(&list2));
928: #else
929: /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
930: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
931: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
933: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
935: fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
937: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
939: partial_dim = fftw->partial_dim;
941: PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
942: PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));
944: if (dim[ndim - 1] % 2 == 0) NM = 2;
945: else NM = 1;
947: j = low;
948: for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
949: indx1[i] = local_0_start * partial_dim + i;
950: indx2[i] = j;
951: if (k % dim[ndim - 1] == 0) j += NM;
952: j++;
953: }
954: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
955: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
956: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
957: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
958: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
959: PetscCall(VecScatterDestroy(&vecscat));
960: PetscCall(ISDestroy(&list1));
961: PetscCall(ISDestroy(&list2));
962: PetscCall(PetscFree(indx1));
963: PetscCall(PetscFree(indx2));
964: #endif
965: break;
966: }
967: #endif
968: }
969: PetscFunctionReturn(PETSC_SUCCESS);
970: }
972: /*@
973: VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.
975: Collective
977: Input Parameters:
978: + A - `MATFFTW` matrix
979: - x - FFTW vector
981: Output Parameter:
982: . y - PETSc vector
984: Level: intermediate
986: Note:
987: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
988: `VecScatterFFTWToPetsc()` removes those extra zeros.
990: .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
991: @*/
992: PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
993: {
994: PetscFunctionBegin;
995: PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
996: PetscFunctionReturn(PETSC_SUCCESS);
997: }
999: static PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
1000: {
1001: MPI_Comm comm;
1002: Mat_FFT *fft = (Mat_FFT *)A->data;
1003: PetscInt low;
1004: PetscMPIInt rank, size;
1005: VecScatter vecscat;
1006: IS list1;
1008: PetscFunctionBegin;
1009: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1010: PetscCallMPI(MPI_Comm_size(comm, &size));
1011: PetscCallMPI(MPI_Comm_rank(comm, &rank));
1012: PetscCall(VecGetOwnershipRange(x, &low, NULL));
1014: if (size == 1) {
1015: PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
1016: PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
1017: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1018: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1019: PetscCall(VecScatterDestroy(&vecscat));
1020: PetscCall(ISDestroy(&list1));
1022: #if !PetscDefined(HAVE_MPIUNI)
1023: } else {
1024: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
1025: PetscInt ndim = fft->ndim, *dim = fft->dim;
1026: ptrdiff_t local_n0, local_0_start;
1027: ptrdiff_t local_n1, local_1_start;
1028: IS list2;
1029: #if !defined(PETSC_USE_COMPLEX)
1030: PetscInt i, j, k, partial_dim;
1031: PetscInt *indx1, *indx2, tempindx, tempindx1;
1032: PetscInt NM;
1033: ptrdiff_t temp;
1034: #else
1035: PetscInt nlow, nstart;
1036: #endif
1037: PetscInt n1;
1038: switch (ndim) {
1039: case 1:
1040: #if defined(PETSC_USE_COMPLEX)
1041: fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1043: PetscCall(PetscIntCast(local_n1, &n1));
1044: PetscCall(PetscIntCast(local_1_start, &nstart));
1045: PetscCall(PetscIntCast(low, &nlow));
1046: PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1047: PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
1048: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1049: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1050: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1051: PetscCall(VecScatterDestroy(&vecscat));
1052: PetscCall(ISDestroy(&list1));
1053: PetscCall(ISDestroy(&list2));
1054: #else
1055: SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
1056: #endif
1057: break;
1058: case 2:
1059: #if defined(PETSC_USE_COMPLEX)
1060: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1062: PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
1063: PetscCall(PetscIntCast(local_0_start * dim[1], &nstart));
1064: PetscCall(PetscIntCast(low, &nlow));
1065: PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1066: PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
1067: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1068: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1069: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1070: PetscCall(VecScatterDestroy(&vecscat));
1071: PetscCall(ISDestroy(&list1));
1072: PetscCall(ISDestroy(&list2));
1073: #else
1074: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1076: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
1077: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));
1079: if (dim[1] % 2 == 0) NM = dim[1] + 2;
1080: else NM = dim[1] + 1;
1082: for (i = 0; i < local_n0; i++) {
1083: for (j = 0; j < dim[1]; j++) {
1084: tempindx = i * dim[1] + j;
1085: tempindx1 = i * NM + j;
1087: PetscCall(PetscIntCast(local_0_start * dim[1] + tempindx, &indx1[tempindx]));
1088: PetscCall(PetscIntCast(low + tempindx1, &indx2[tempindx]));
1089: }
1090: }
1092: PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
1093: PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
1094: PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));
1096: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1097: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1098: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1099: PetscCall(VecScatterDestroy(&vecscat));
1100: PetscCall(ISDestroy(&list1));
1101: PetscCall(ISDestroy(&list2));
1102: PetscCall(PetscFree(indx1));
1103: PetscCall(PetscFree(indx2));
1104: #endif
1105: break;
1106: case 3:
1107: #if defined(PETSC_USE_COMPLEX)
1108: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
1110: PetscCall(PetscIntCast(local_n0 * dim[1] * dim[2], &n1));
1111: PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2], &nstart));
1112: PetscCall(PetscIntCast(low, &nlow));
1113: PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1114: PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
1115: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1116: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1117: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1118: PetscCall(VecScatterDestroy(&vecscat));
1119: PetscCall(ISDestroy(&list1));
1120: PetscCall(ISDestroy(&list2));
1121: #else
1122: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1124: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
1125: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));
1127: if (dim[2] % 2 == 0) NM = dim[2] + 2;
1128: else NM = dim[2] + 1;
1130: for (i = 0; i < local_n0; i++) {
1131: for (j = 0; j < dim[1]; j++) {
1132: for (k = 0; k < dim[2]; k++) {
1133: tempindx = i * dim[1] * dim[2] + j * dim[2] + k;
1134: tempindx1 = i * dim[1] * NM + j * NM + k;
1136: PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2] + tempindx, &indx1[tempindx]));
1137: PetscCall(PetscIntCast(low + tempindx1, &indx2[tempindx]));
1138: }
1139: }
1140: }
1142: PetscCall(PetscIntCast(local_n0 * dim[1] * dim[2], &n1));
1143: PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
1144: PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));
1146: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1147: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1148: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1149: PetscCall(VecScatterDestroy(&vecscat));
1150: PetscCall(ISDestroy(&list1));
1151: PetscCall(ISDestroy(&list2));
1152: PetscCall(PetscFree(indx1));
1153: PetscCall(PetscFree(indx2));
1154: #endif
1155: break;
1156: default:
1157: #if defined(PETSC_USE_COMPLEX)
1158: fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
1160: PetscCall(PetscIntCast(local_n0 * (fftw->partial_dim), &n1));
1161: PetscCall(PetscIntCast(local_0_start * (fftw->partial_dim), &nstart));
1162: PetscCall(PetscIntCast(low, &nlow));
1163: PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1164: PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
1165: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1166: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1167: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1168: PetscCall(VecScatterDestroy(&vecscat));
1169: PetscCall(ISDestroy(&list1));
1170: PetscCall(ISDestroy(&list2));
1171: #else
1172: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
1174: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
1176: fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1178: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
1180: partial_dim = fftw->partial_dim;
1182: PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
1183: PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));
1185: if (dim[ndim - 1] % 2 == 0) NM = 2;
1186: else NM = 1;
1188: j = low;
1189: for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
1190: PetscCall(PetscIntCast(local_0_start * partial_dim + i, &indx1[i]));
1191: indx2[i] = j;
1192: if (k % dim[ndim - 1] == 0) j += NM;
1193: j++;
1194: }
1195: PetscCall(PetscIntCast(local_n0 * partial_dim, &n1));
1196: PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
1197: PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));
1199: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1200: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1201: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1202: PetscCall(VecScatterDestroy(&vecscat));
1203: PetscCall(ISDestroy(&list1));
1204: PetscCall(ISDestroy(&list2));
1205: PetscCall(PetscFree(indx1));
1206: PetscCall(PetscFree(indx2));
1207: #endif
1208: break;
1209: }
1210: #endif
1211: }
1212: PetscFunctionReturn(PETSC_SUCCESS);
1213: }
1215: /*MC
1216: MATFFTW - "fftw" - Matrix type that provides FFT via the FFTW external package.
1218: Level: intermediate
1220: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()`
1221: M*/
1222: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1223: {
1224: MPI_Comm comm;
1225: Mat_FFT *fft = (Mat_FFT *)A->data;
1226: Mat_FFTW *fftw;
1227: PetscInt ndim = fft->ndim, *dim = fft->dim;
1228: const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
1229: unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1230: PetscBool flg;
1231: PetscInt p_flag, partial_dim = 1, ctr;
1232: PetscMPIInt size, rank;
1233: ptrdiff_t *pdim;
1234: #if !defined(PETSC_USE_COMPLEX)
1235: PetscInt tot_dim;
1236: #endif
1238: PetscFunctionBegin;
1239: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1240: PetscCallMPI(MPI_Comm_size(comm, &size));
1241: PetscCallMPI(MPI_Comm_rank(comm, &rank));
1243: #if !PetscDefined(HAVE_MPIUNI)
1244: fftw_mpi_init();
1245: #endif
1246: pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
1247: pdim[0] = dim[0];
1248: #if !defined(PETSC_USE_COMPLEX)
1249: tot_dim = 2 * dim[0];
1250: #endif
1251: for (ctr = 1; ctr < ndim; ctr++) {
1252: partial_dim *= dim[ctr];
1253: pdim[ctr] = dim[ctr];
1254: #if !defined(PETSC_USE_COMPLEX)
1255: if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1256: else tot_dim *= dim[ctr];
1257: #endif
1258: }
1260: if (size == 1) {
1261: #if defined(PETSC_USE_COMPLEX)
1262: PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
1263: fft->n = fft->N;
1264: #else
1265: PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
1266: fft->n = tot_dim;
1267: #endif
1268: #if !PetscDefined(HAVE_MPIUNI)
1269: } else {
1270: ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
1271: #if !defined(PETSC_USE_COMPLEX)
1272: ptrdiff_t temp;
1273: PetscInt N1;
1274: #else
1275: PetscInt n1;
1276: #endif
1278: switch (ndim) {
1279: case 1:
1280: #if !defined(PETSC_USE_COMPLEX)
1281: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1282: #else
1283: fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1284: PetscCall(PetscIntCast(local_n0, &fft->n));
1285: PetscCall(PetscIntCast(local_n1, &n1));
1286: PetscCall(MatSetSizes(A, n1, fft->n, fft->N, fft->N));
1287: #endif
1288: break;
1289: case 2:
1290: #if defined(PETSC_USE_COMPLEX)
1291: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1292: fft->n = (PetscInt)local_n0 * dim[1];
1293: PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1294: #else
1295: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1297: fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
1298: PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
1299: #endif
1300: break;
1301: case 3:
1302: #if defined(PETSC_USE_COMPLEX)
1303: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
1305: fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
1306: PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1307: #else
1308: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1310: fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
1311: PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
1312: #endif
1313: break;
1314: default:
1315: #if defined(PETSC_USE_COMPLEX)
1316: fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);
1318: PetscCall(PetscIntCast(local_n0 * partial_dim, &fft->n));
1319: PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1320: #else
1321: temp = pdim[ndim - 1];
1323: pdim[ndim - 1] = temp / 2 + 1;
1325: fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1327: PetscCall(PetscIntCast(2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp, &fft->n));
1328: N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);
1330: pdim[ndim - 1] = temp;
1332: PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1333: #endif
1334: break;
1335: }
1336: #endif
1337: }
1338: free(pdim);
1339: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
1340: PetscCall(PetscNew(&fftw));
1341: fft->data = (void *)fftw;
1343: fftw->ndim_fftw = ndim; /* This is dimension of fft */
1344: fftw->partial_dim = partial_dim;
1346: PetscCall(PetscMalloc1(ndim, &fftw->dim_fftw));
1347: if (size == 1) {
1348: #if defined(PETSC_USE_64BIT_INDICES)
1349: fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1350: #else
1351: fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1352: #endif
1353: }
1355: for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];
1357: fftw->p_forward = NULL;
1358: fftw->p_backward = NULL;
1359: fftw->p_flag = FFTW_ESTIMATE;
1361: if (size == 1) {
1362: A->ops->mult = MatMult_SeqFFTW;
1363: A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1364: #if !PetscDefined(HAVE_MPIUNI)
1365: } else {
1366: A->ops->mult = MatMult_MPIFFTW;
1367: A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1368: #endif
1369: }
1370: fft->matdestroy = MatDestroy_FFTW;
1371: A->assembled = PETSC_TRUE;
1372: A->preallocated = PETSC_TRUE;
1374: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
1375: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
1376: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));
1378: /* get runtime options */
1379: PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
1380: PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
1381: if (flg) fftw->p_flag = iplans[p_flag];
1382: PetscOptionsEnd();
1383: PetscFunctionReturn(PETSC_SUCCESS);
1384: }