Actual source code: fftw.c
2: /*
3: Provides an interface to the FFTW package.
4: Testing examples can be found in ~src/mat/tests
5: */
7: #include <../src/mat/impls/fft/fft.h>
8: EXTERN_C_BEGIN
9: #if !PetscDefined(HAVE_MPIUNI)
10: #include <fftw3-mpi.h>
11: #else
12: #include <fftw3.h>
13: #endif
14: EXTERN_C_END
16: typedef struct {
17: ptrdiff_t ndim_fftw, *dim_fftw;
18: #if defined(PETSC_USE_64BIT_INDICES)
19: fftw_iodim64 *iodims;
20: #else
21: fftw_iodim *iodims;
22: #endif
23: PetscInt partial_dim;
24: fftw_plan p_forward, p_backward;
25: unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
26: PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw plan should be
27: executed for the arrays with which the plan was created */
28: } Mat_FFTW;
30: extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec);
31: extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec);
32: extern PetscErrorCode MatDestroy_FFTW(Mat);
33: extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);
34: #if !PetscDefined(HAVE_MPIUNI)
35: extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec);
36: extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec);
37: extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
38: #endif
40: /*
41: MatMult_SeqFFTW performs forward DFT
42: Input parameter:
43: A - the matrix
44: x - the vector on which FDFT will be performed
46: Output parameter:
47: y - vector that stores result of FDFT
48: */
49: PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
50: {
51: Mat_FFT *fft = (Mat_FFT *)A->data;
52: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
53: const PetscScalar *x_array;
54: PetscScalar *y_array;
55: #if defined(PETSC_USE_COMPLEX)
56: #if defined(PETSC_USE_64BIT_INDICES)
57: fftw_iodim64 *iodims;
58: #else
59: fftw_iodim *iodims;
60: #endif
61: PetscInt i;
62: #endif
63: PetscInt ndim = fft->ndim, *dim = fft->dim;
65: VecGetArrayRead(x, &x_array);
66: VecGetArray(y, &y_array);
67: if (!fftw->p_forward) { /* create a plan, then execute it */
68: switch (ndim) {
69: case 1:
70: #if defined(PETSC_USE_COMPLEX)
71: fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
72: #else
73: fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
74: #endif
75: break;
76: case 2:
77: #if defined(PETSC_USE_COMPLEX)
78: fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
79: #else
80: fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
81: #endif
82: break;
83: case 3:
84: #if defined(PETSC_USE_COMPLEX)
85: fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
86: #else
87: fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
88: #endif
89: break;
90: default:
91: #if defined(PETSC_USE_COMPLEX)
92: iodims = fftw->iodims;
93: #if defined(PETSC_USE_64BIT_INDICES)
94: if (ndim) {
95: iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1];
96: iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
97: for (i = ndim - 2; i >= 0; --i) {
98: iodims[i].n = (ptrdiff_t)dim[i];
99: iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
100: }
101: }
102: fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
103: #else
104: if (ndim) {
105: iodims[ndim - 1].n = (int)dim[ndim - 1];
106: iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
107: for (i = ndim - 2; i >= 0; --i) {
108: iodims[i].n = (int)dim[i];
109: iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
110: }
111: }
112: fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
113: #endif
115: #else
116: fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
117: #endif
118: break;
119: }
120: fftw->finarray = (PetscScalar *)x_array;
121: fftw->foutarray = y_array;
122: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
123: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
124: fftw_execute(fftw->p_forward);
125: } else { /* use existing plan */
126: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
127: #if defined(PETSC_USE_COMPLEX)
128: fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
129: #else
130: fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
131: #endif
132: } else {
133: fftw_execute(fftw->p_forward);
134: }
135: }
136: VecRestoreArray(y, &y_array);
137: VecRestoreArrayRead(x, &x_array);
138: return 0;
139: }
141: /* MatMultTranspose_SeqFFTW performs serial backward DFT
142: Input parameter:
143: A - the matrix
144: x - the vector on which BDFT will be performed
146: Output parameter:
147: y - vector that stores result of BDFT
148: */
150: PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
151: {
152: Mat_FFT *fft = (Mat_FFT *)A->data;
153: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
154: const PetscScalar *x_array;
155: PetscScalar *y_array;
156: PetscInt ndim = fft->ndim, *dim = fft->dim;
157: #if defined(PETSC_USE_COMPLEX)
158: #if defined(PETSC_USE_64BIT_INDICES)
159: fftw_iodim64 *iodims = fftw->iodims;
160: #else
161: fftw_iodim *iodims = fftw->iodims;
162: #endif
163: #endif
165: VecGetArrayRead(x, &x_array);
166: VecGetArray(y, &y_array);
167: if (!fftw->p_backward) { /* create a plan, then execute it */
168: switch (ndim) {
169: case 1:
170: #if defined(PETSC_USE_COMPLEX)
171: fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
172: #else
173: fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
174: #endif
175: break;
176: case 2:
177: #if defined(PETSC_USE_COMPLEX)
178: fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
179: #else
180: fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
181: #endif
182: break;
183: case 3:
184: #if defined(PETSC_USE_COMPLEX)
185: fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
186: #else
187: fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
188: #endif
189: break;
190: default:
191: #if defined(PETSC_USE_COMPLEX)
192: #if defined(PETSC_USE_64BIT_INDICES)
193: fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
194: #else
195: fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
196: #endif
197: #else
198: fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
199: #endif
200: break;
201: }
202: fftw->binarray = (PetscScalar *)x_array;
203: fftw->boutarray = y_array;
204: fftw_execute(fftw->p_backward);
205: } else { /* use existing plan */
206: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
207: #if defined(PETSC_USE_COMPLEX)
208: fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
209: #else
210: fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
211: #endif
212: } else {
213: fftw_execute(fftw->p_backward);
214: }
215: }
216: VecRestoreArray(y, &y_array);
217: VecRestoreArrayRead(x, &x_array);
218: return 0;
219: }
221: #if !PetscDefined(HAVE_MPIUNI)
222: /* MatMult_MPIFFTW performs forward DFT in parallel
223: Input parameter:
224: A - the matrix
225: x - the vector on which FDFT will be performed
227: Output parameter:
228: y - vector that stores result of FDFT
229: */
230: PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
231: {
232: Mat_FFT *fft = (Mat_FFT *)A->data;
233: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
234: const PetscScalar *x_array;
235: PetscScalar *y_array;
236: PetscInt ndim = fft->ndim, *dim = fft->dim;
237: MPI_Comm comm;
239: PetscObjectGetComm((PetscObject)A, &comm);
240: VecGetArrayRead(x, &x_array);
241: VecGetArray(y, &y_array);
242: if (!fftw->p_forward) { /* create a plan, then execute it */
243: switch (ndim) {
244: case 1:
245: #if defined(PETSC_USE_COMPLEX)
246: fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
247: #else
248: SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
249: #endif
250: break;
251: case 2:
252: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
253: fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
254: #else
255: fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
256: #endif
257: break;
258: case 3:
259: #if defined(PETSC_USE_COMPLEX)
260: fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
261: #else
262: fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
263: #endif
264: break;
265: default:
266: #if defined(PETSC_USE_COMPLEX)
267: fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
268: #else
269: fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
270: #endif
271: break;
272: }
273: fftw->finarray = (PetscScalar *)x_array;
274: fftw->foutarray = y_array;
275: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
276: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
277: fftw_execute(fftw->p_forward);
278: } else { /* use existing plan */
279: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
280: fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
281: } else {
282: fftw_execute(fftw->p_forward);
283: }
284: }
285: VecRestoreArray(y, &y_array);
286: VecRestoreArrayRead(x, &x_array);
287: return 0;
288: }
290: /*
291: MatMultTranspose_MPIFFTW performs parallel backward DFT
292: Input parameter:
293: A - the matrix
294: x - the vector on which BDFT will be performed
296: Output parameter:
297: y - vector that stores result of BDFT
298: */
299: PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
300: {
301: Mat_FFT *fft = (Mat_FFT *)A->data;
302: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
303: const PetscScalar *x_array;
304: PetscScalar *y_array;
305: PetscInt ndim = fft->ndim, *dim = fft->dim;
306: MPI_Comm comm;
308: PetscObjectGetComm((PetscObject)A, &comm);
309: VecGetArrayRead(x, &x_array);
310: VecGetArray(y, &y_array);
311: if (!fftw->p_backward) { /* create a plan, then execute it */
312: switch (ndim) {
313: case 1:
314: #if defined(PETSC_USE_COMPLEX)
315: fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
316: #else
317: SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
318: #endif
319: break;
320: case 2:
321: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
322: fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
323: #else
324: fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
325: #endif
326: break;
327: case 3:
328: #if defined(PETSC_USE_COMPLEX)
329: fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
330: #else
331: fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
332: #endif
333: break;
334: default:
335: #if defined(PETSC_USE_COMPLEX)
336: fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
337: #else
338: fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
339: #endif
340: break;
341: }
342: fftw->binarray = (PetscScalar *)x_array;
343: fftw->boutarray = y_array;
344: fftw_execute(fftw->p_backward);
345: } else { /* use existing plan */
346: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
347: fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
348: } else {
349: fftw_execute(fftw->p_backward);
350: }
351: }
352: VecRestoreArray(y, &y_array);
353: VecRestoreArrayRead(x, &x_array);
354: return 0;
355: }
356: #endif
358: PetscErrorCode MatDestroy_FFTW(Mat A)
359: {
360: Mat_FFT *fft = (Mat_FFT *)A->data;
361: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
363: fftw_destroy_plan(fftw->p_forward);
364: fftw_destroy_plan(fftw->p_backward);
365: if (fftw->iodims) free(fftw->iodims);
366: PetscFree(fftw->dim_fftw);
367: PetscFree(fft->data);
368: PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL);
369: PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL);
370: PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL);
371: #if !PetscDefined(HAVE_MPIUNI)
372: fftw_mpi_cleanup();
373: #endif
374: return 0;
375: }
377: #if !PetscDefined(HAVE_MPIUNI)
378: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
379: PetscErrorCode VecDestroy_MPIFFTW(Vec v)
380: {
381: PetscScalar *array;
383: VecGetArray(v, &array);
384: fftw_free((fftw_complex *)array);
385: VecRestoreArray(v, &array);
386: VecDestroy_MPI(v);
387: return 0;
388: }
389: #endif
391: #if !PetscDefined(HAVE_MPIUNI)
392: static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
393: {
394: Mat A;
396: PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A);
397: MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL);
398: return 0;
399: }
401: static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
402: {
403: Mat A;
405: PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A);
406: MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL);
407: return 0;
408: }
410: static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
411: {
412: Mat A;
414: PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A);
415: MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new);
416: return 0;
417: }
418: #endif
420: /*@
421: MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
422: parallel layout determined by `MATFFTW`
424: Collective
426: Input Parameter:
427: . A - the matrix
429: Output Parameters:
430: + x - (optional) input vector of forward FFTW
431: . y - (optional) output vector of forward FFTW
432: - z - (optional) output vector of backward FFTW
434: Level: advanced
436: Notes:
437: The parallel layout of output of forward FFTW is always same as the input
438: of the backward FFTW. But parallel layout of the input vector of forward
439: FFTW might not be same as the output of backward FFTW.
441: We need to provide enough space while doing parallel real transform.
442: We need to pad extra zeros at the end of last dimension. For this reason the one needs to
443: invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
444: last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
445: depends on if the last dimension is even or odd. If the last dimension is even need to pad two
446: zeros if it is odd only one zero is needed.
448: Lastly one needs some scratch space at the end of data set in each process. alloc_local
449: figures out how much space is needed, i.e. it figures out the data+scratch space for
450: each processor and returns that.
452: Developer Note:
453: How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?
455: .seealso: `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
456: @*/
457: PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
458: {
459: PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
460: return 0;
461: }
463: PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
464: {
465: PetscMPIInt size, rank;
466: MPI_Comm comm;
467: Mat_FFT *fft = (Mat_FFT *)A->data;
471: PetscObjectGetComm((PetscObject)A, &comm);
473: MPI_Comm_size(comm, &size);
474: MPI_Comm_rank(comm, &rank);
475: if (size == 1) { /* sequential case */
476: #if defined(PETSC_USE_COMPLEX)
477: if (fin) VecCreateSeq(PETSC_COMM_SELF, fft->N, fin);
478: if (fout) VecCreateSeq(PETSC_COMM_SELF, fft->N, fout);
479: if (bout) VecCreateSeq(PETSC_COMM_SELF, fft->N, bout);
480: #else
481: if (fin) VecCreateSeq(PETSC_COMM_SELF, fft->n, fin);
482: if (fout) VecCreateSeq(PETSC_COMM_SELF, fft->n, fout);
483: if (bout) VecCreateSeq(PETSC_COMM_SELF, fft->n, bout);
484: #endif
485: #if !PetscDefined(HAVE_MPIUNI)
486: } else { /* parallel cases */
487: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
488: PetscInt ndim = fft->ndim, *dim = fft->dim;
489: ptrdiff_t alloc_local, local_n0, local_0_start;
490: ptrdiff_t local_n1;
491: fftw_complex *data_fout;
492: ptrdiff_t local_1_start;
493: #if defined(PETSC_USE_COMPLEX)
494: fftw_complex *data_fin, *data_bout;
495: #else
496: double *data_finr, *data_boutr;
497: PetscInt n1, N1;
498: ptrdiff_t temp;
499: #endif
501: switch (ndim) {
502: case 1:
503: #if !defined(PETSC_USE_COMPLEX)
504: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
505: #else
506: alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
507: if (fin) {
508: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
509: VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin);
510: PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A);
511: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
512: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
513: }
514: if (fout) {
515: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
516: VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout);
517: PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A);
518: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
519: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
520: }
521: alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
522: if (bout) {
523: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
524: VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout);
525: PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A);
526: (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
527: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
528: }
529: break;
530: #endif
531: case 2:
532: #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
533: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
534: N1 = 2 * dim[0] * (dim[1] / 2 + 1);
535: n1 = 2 * local_n0 * (dim[1] / 2 + 1);
536: if (fin) {
537: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
538: VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin);
539: PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A);
540: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
541: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
542: }
543: if (fout) {
544: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
545: VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout);
546: PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A);
547: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
548: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
549: }
550: if (bout) {
551: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
552: VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout);
553: PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A);
554: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
555: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
556: }
557: #else
558: /* Get local size */
559: alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
560: if (fin) {
561: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
562: VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin);
563: PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A);
564: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
565: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
566: }
567: if (fout) {
568: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
569: VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout);
570: PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A);
571: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
572: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
573: }
574: if (bout) {
575: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
576: VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout);
577: PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A);
578: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
579: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
580: }
581: #endif
582: break;
583: case 3:
584: #if !defined(PETSC_USE_COMPLEX)
585: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
586: N1 = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1);
587: n1 = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1);
588: if (fin) {
589: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
590: VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin);
591: PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A);
592: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
593: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
594: }
595: if (fout) {
596: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
597: VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout);
598: PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A);
599: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
600: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
601: }
602: if (bout) {
603: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
604: VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout);
605: PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A);
606: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
607: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
608: }
609: #else
610: alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
611: if (fin) {
612: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
613: VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin);
614: PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A);
615: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
616: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
617: }
618: if (fout) {
619: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
620: VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout);
621: PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A);
622: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
623: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
624: }
625: if (bout) {
626: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
627: VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout);
628: PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A);
629: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
630: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
631: }
632: #endif
633: break;
634: default:
635: #if !defined(PETSC_USE_COMPLEX)
636: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
637: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
638: alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
639: N1 = 2 * fft->N * (PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
640: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
642: if (fin) {
643: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
644: VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin);
645: PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A);
646: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
647: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
648: }
649: if (fout) {
650: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
651: VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout);
652: PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A);
653: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
654: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
655: }
656: if (bout) {
657: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
658: VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout);
659: PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A);
660: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
661: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
662: }
663: #else
664: alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
665: if (fin) {
666: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
667: VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin);
668: PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A);
669: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
670: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
671: }
672: if (fout) {
673: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
674: VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout);
675: PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A);
676: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
677: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
678: }
679: if (bout) {
680: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
681: VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout);
682: PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A);
683: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
684: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
685: }
686: #endif
687: break;
688: }
689: /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
690: v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
691: memory leaks. We void these pointers here to avoid accident uses.
692: */
693: if (fin) (*fin)->ops->replacearray = NULL;
694: if (fout) (*fout)->ops->replacearray = NULL;
695: if (bout) (*bout)->ops->replacearray = NULL;
696: #endif
697: }
698: return 0;
699: }
701: /*@
702: VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.
704: Collective
706: Input Parameters:
707: + A - FFTW matrix
708: - x - the PETSc vector
710: Output Parameters:
711: . y - the FFTW vector
713: Level: intermediate
715: Note:
716: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
717: one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
718: zeros. This routine does that job by scattering operation.
720: .seealso: `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
721: @*/
722: PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
723: {
724: PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
725: return 0;
726: }
728: PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
729: {
730: MPI_Comm comm;
731: Mat_FFT *fft = (Mat_FFT *)A->data;
732: PetscInt low;
733: PetscMPIInt rank, size;
734: PetscInt vsize, vsize1;
735: VecScatter vecscat;
736: IS list1;
738: PetscObjectGetComm((PetscObject)A, &comm);
739: MPI_Comm_size(comm, &size);
740: MPI_Comm_rank(comm, &rank);
741: VecGetOwnershipRange(y, &low, NULL);
743: if (size == 1) {
744: VecGetSize(x, &vsize);
745: VecGetSize(y, &vsize1);
746: ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1);
747: VecScatterCreate(x, list1, y, list1, &vecscat);
748: VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
749: VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
750: VecScatterDestroy(&vecscat);
751: ISDestroy(&list1);
752: #if !PetscDefined(HAVE_MPIUNI)
753: } else {
754: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
755: PetscInt ndim = fft->ndim, *dim = fft->dim;
756: ptrdiff_t local_n0, local_0_start;
757: ptrdiff_t local_n1, local_1_start;
758: IS list2;
759: #if !defined(PETSC_USE_COMPLEX)
760: PetscInt i, j, k, partial_dim;
761: PetscInt *indx1, *indx2, tempindx, tempindx1;
762: PetscInt NM;
763: ptrdiff_t temp;
764: #endif
766: switch (ndim) {
767: case 1:
768: #if defined(PETSC_USE_COMPLEX)
769: fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
771: ISCreateStride(comm, local_n0, local_0_start, 1, &list1);
772: ISCreateStride(comm, local_n0, low, 1, &list2);
773: VecScatterCreate(x, list1, y, list2, &vecscat);
774: VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
775: VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
776: VecScatterDestroy(&vecscat);
777: ISDestroy(&list1);
778: ISDestroy(&list2);
779: #else
780: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
781: #endif
782: break;
783: case 2:
784: #if defined(PETSC_USE_COMPLEX)
785: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
787: ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1);
788: ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2);
789: VecScatterCreate(x, list1, y, list2, &vecscat);
790: VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
791: VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
792: VecScatterDestroy(&vecscat);
793: ISDestroy(&list1);
794: ISDestroy(&list2);
795: #else
796: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
798: PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1);
799: PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2);
801: if (dim[1] % 2 == 0) {
802: NM = dim[1] + 2;
803: } else {
804: NM = dim[1] + 1;
805: }
806: for (i = 0; i < local_n0; i++) {
807: for (j = 0; j < dim[1]; j++) {
808: tempindx = i * dim[1] + j;
809: tempindx1 = i * NM + j;
811: indx1[tempindx] = local_0_start * dim[1] + tempindx;
812: indx2[tempindx] = low + tempindx1;
813: }
814: }
816: ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1);
817: ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2);
819: VecScatterCreate(x, list1, y, list2, &vecscat);
820: VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
821: VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
822: VecScatterDestroy(&vecscat);
823: ISDestroy(&list1);
824: ISDestroy(&list2);
825: PetscFree(indx1);
826: PetscFree(indx2);
827: #endif
828: break;
830: case 3:
831: #if defined(PETSC_USE_COMPLEX)
832: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
834: ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1);
835: ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2);
836: VecScatterCreate(x, list1, y, list2, &vecscat);
837: VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
838: VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
839: VecScatterDestroy(&vecscat);
840: ISDestroy(&list1);
841: ISDestroy(&list2);
842: #else
843: /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
844: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
845: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
847: PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1);
848: PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2);
850: if (dim[2] % 2 == 0) NM = dim[2] + 2;
851: else NM = dim[2] + 1;
853: for (i = 0; i < local_n0; i++) {
854: for (j = 0; j < dim[1]; j++) {
855: for (k = 0; k < dim[2]; k++) {
856: tempindx = i * dim[1] * dim[2] + j * dim[2] + k;
857: tempindx1 = i * dim[1] * NM + j * NM + k;
859: indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
860: indx2[tempindx] = low + tempindx1;
861: }
862: }
863: }
865: ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1);
866: ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2);
867: VecScatterCreate(x, list1, y, list2, &vecscat);
868: VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
869: VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
870: VecScatterDestroy(&vecscat);
871: ISDestroy(&list1);
872: ISDestroy(&list2);
873: PetscFree(indx1);
874: PetscFree(indx2);
875: #endif
876: break;
878: default:
879: #if defined(PETSC_USE_COMPLEX)
880: fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
882: ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1);
883: ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2);
884: VecScatterCreate(x, list1, y, list2, &vecscat);
885: VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
886: VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
887: VecScatterDestroy(&vecscat);
888: ISDestroy(&list1);
889: ISDestroy(&list2);
890: #else
891: /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
892: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
893: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
895: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
897: fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
899: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
901: partial_dim = fftw->partial_dim;
903: PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1);
904: PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2);
906: if (dim[ndim - 1] % 2 == 0) NM = 2;
907: else NM = 1;
909: j = low;
910: for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) {
911: indx1[i] = local_0_start * partial_dim + i;
912: indx2[i] = j;
913: if (k % dim[ndim - 1] == 0) j += NM;
914: j++;
915: }
916: ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1);
917: ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2);
918: VecScatterCreate(x, list1, y, list2, &vecscat);
919: VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
920: VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
921: VecScatterDestroy(&vecscat);
922: ISDestroy(&list1);
923: ISDestroy(&list2);
924: PetscFree(indx1);
925: PetscFree(indx2);
926: #endif
927: break;
928: }
929: #endif
930: }
931: return 0;
932: }
934: /*@
935: VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.
937: Collective
939: Input Parameters:
940: + A - `MATFFTW` matrix
941: - x - FFTW vector
943: Output Parameters:
944: . y - PETSc vector
946: Level: intermediate
948: Note:
949: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
950: `VecScatterFFTWToPetsc()` removes those extra zeros.
952: .seealso: `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
953: @*/
954: PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
955: {
956: PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
957: return 0;
958: }
960: PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
961: {
962: MPI_Comm comm;
963: Mat_FFT *fft = (Mat_FFT *)A->data;
964: PetscInt low;
965: PetscMPIInt rank, size;
966: VecScatter vecscat;
967: IS list1;
969: PetscObjectGetComm((PetscObject)A, &comm);
970: MPI_Comm_size(comm, &size);
971: MPI_Comm_rank(comm, &rank);
972: VecGetOwnershipRange(x, &low, NULL);
974: if (size == 1) {
975: ISCreateStride(comm, fft->N, 0, 1, &list1);
976: VecScatterCreate(x, list1, y, list1, &vecscat);
977: VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
978: VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
979: VecScatterDestroy(&vecscat);
980: ISDestroy(&list1);
982: #if !PetscDefined(HAVE_MPIUNI)
983: } else {
984: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
985: PetscInt ndim = fft->ndim, *dim = fft->dim;
986: ptrdiff_t local_n0, local_0_start;
987: ptrdiff_t local_n1, local_1_start;
988: IS list2;
989: #if !defined(PETSC_USE_COMPLEX)
990: PetscInt i, j, k, partial_dim;
991: PetscInt *indx1, *indx2, tempindx, tempindx1;
992: PetscInt NM;
993: ptrdiff_t temp;
994: #endif
995: switch (ndim) {
996: case 1:
997: #if defined(PETSC_USE_COMPLEX)
998: fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1000: ISCreateStride(comm, local_n1, local_1_start, 1, &list1);
1001: ISCreateStride(comm, local_n1, low, 1, &list2);
1002: VecScatterCreate(x, list1, y, list2, &vecscat);
1003: VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
1004: VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
1005: VecScatterDestroy(&vecscat);
1006: ISDestroy(&list1);
1007: ISDestroy(&list2);
1008: #else
1009: SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
1010: #endif
1011: break;
1012: case 2:
1013: #if defined(PETSC_USE_COMPLEX)
1014: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1016: ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1);
1017: ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2);
1018: VecScatterCreate(x, list2, y, list1, &vecscat);
1019: VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
1020: VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
1021: VecScatterDestroy(&vecscat);
1022: ISDestroy(&list1);
1023: ISDestroy(&list2);
1024: #else
1025: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1027: PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1);
1028: PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2);
1030: if (dim[1] % 2 == 0) NM = dim[1] + 2;
1031: else NM = dim[1] + 1;
1033: for (i = 0; i < local_n0; i++) {
1034: for (j = 0; j < dim[1]; j++) {
1035: tempindx = i * dim[1] + j;
1036: tempindx1 = i * NM + j;
1038: indx1[tempindx] = local_0_start * dim[1] + tempindx;
1039: indx2[tempindx] = low + tempindx1;
1040: }
1041: }
1043: ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1);
1044: ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2);
1046: VecScatterCreate(x, list2, y, list1, &vecscat);
1047: VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
1048: VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
1049: VecScatterDestroy(&vecscat);
1050: ISDestroy(&list1);
1051: ISDestroy(&list2);
1052: PetscFree(indx1);
1053: PetscFree(indx2);
1054: #endif
1055: break;
1056: case 3:
1057: #if defined(PETSC_USE_COMPLEX)
1058: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
1060: ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1);
1061: ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2);
1062: VecScatterCreate(x, list1, y, list2, &vecscat);
1063: VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
1064: VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
1065: VecScatterDestroy(&vecscat);
1066: ISDestroy(&list1);
1067: ISDestroy(&list2);
1068: #else
1069: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1071: PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1);
1072: PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2);
1074: if (dim[2] % 2 == 0) NM = dim[2] + 2;
1075: else NM = dim[2] + 1;
1077: for (i = 0; i < local_n0; i++) {
1078: for (j = 0; j < dim[1]; j++) {
1079: for (k = 0; k < dim[2]; k++) {
1080: tempindx = i * dim[1] * dim[2] + j * dim[2] + k;
1081: tempindx1 = i * dim[1] * NM + j * NM + k;
1083: indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
1084: indx2[tempindx] = low + tempindx1;
1085: }
1086: }
1087: }
1089: ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1);
1090: ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2);
1092: VecScatterCreate(x, list2, y, list1, &vecscat);
1093: VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
1094: VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
1095: VecScatterDestroy(&vecscat);
1096: ISDestroy(&list1);
1097: ISDestroy(&list2);
1098: PetscFree(indx1);
1099: PetscFree(indx2);
1100: #endif
1101: break;
1102: default:
1103: #if defined(PETSC_USE_COMPLEX)
1104: fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
1106: ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1);
1107: ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2);
1108: VecScatterCreate(x, list1, y, list2, &vecscat);
1109: VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
1110: VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
1111: VecScatterDestroy(&vecscat);
1112: ISDestroy(&list1);
1113: ISDestroy(&list2);
1114: #else
1115: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
1117: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
1119: fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1121: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
1123: partial_dim = fftw->partial_dim;
1125: PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1);
1126: PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2);
1128: if (dim[ndim - 1] % 2 == 0) NM = 2;
1129: else NM = 1;
1131: j = low;
1132: for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) {
1133: indx1[i] = local_0_start * partial_dim + i;
1134: indx2[i] = j;
1135: if (k % dim[ndim - 1] == 0) j += NM;
1136: j++;
1137: }
1138: ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1);
1139: ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2);
1141: VecScatterCreate(x, list2, y, list1, &vecscat);
1142: VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
1143: VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
1144: VecScatterDestroy(&vecscat);
1145: ISDestroy(&list1);
1146: ISDestroy(&list2);
1147: PetscFree(indx1);
1148: PetscFree(indx2);
1149: #endif
1150: break;
1151: }
1152: #endif
1153: }
1154: return 0;
1155: }
1157: /*
1158: MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1160: Options Database Keys:
1161: + -mat_fftw_plannerflags - set FFTW planner flags
1163: Level: intermediate
1165: */
1166: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1167: {
1168: MPI_Comm comm;
1169: Mat_FFT *fft = (Mat_FFT *)A->data;
1170: Mat_FFTW *fftw;
1171: PetscInt ndim = fft->ndim, *dim = fft->dim;
1172: const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
1173: unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1174: PetscBool flg;
1175: PetscInt p_flag, partial_dim = 1, ctr;
1176: PetscMPIInt size, rank;
1177: ptrdiff_t *pdim;
1178: #if !defined(PETSC_USE_COMPLEX)
1179: PetscInt tot_dim;
1180: #endif
1182: PetscObjectGetComm((PetscObject)A, &comm);
1183: MPI_Comm_size(comm, &size);
1184: MPI_Comm_rank(comm, &rank);
1186: #if !PetscDefined(HAVE_MPIUNI)
1187: fftw_mpi_init();
1188: #endif
1189: pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
1190: pdim[0] = dim[0];
1191: #if !defined(PETSC_USE_COMPLEX)
1192: tot_dim = 2 * dim[0];
1193: #endif
1194: for (ctr = 1; ctr < ndim; ctr++) {
1195: partial_dim *= dim[ctr];
1196: pdim[ctr] = dim[ctr];
1197: #if !defined(PETSC_USE_COMPLEX)
1198: if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1199: else tot_dim *= dim[ctr];
1200: #endif
1201: }
1203: if (size == 1) {
1204: #if defined(PETSC_USE_COMPLEX)
1205: MatSetSizes(A, fft->N, fft->N, fft->N, fft->N);
1206: fft->n = fft->N;
1207: #else
1208: MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim);
1209: fft->n = tot_dim;
1210: #endif
1211: #if !PetscDefined(HAVE_MPIUNI)
1212: } else {
1213: ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
1214: #if !defined(PETSC_USE_COMPLEX)
1215: ptrdiff_t temp;
1216: PetscInt N1;
1217: #endif
1219: switch (ndim) {
1220: case 1:
1221: #if !defined(PETSC_USE_COMPLEX)
1222: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1223: #else
1224: fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1225: fft->n = (PetscInt)local_n0;
1226: MatSetSizes(A, local_n1, fft->n, fft->N, fft->N);
1227: #endif
1228: break;
1229: case 2:
1230: #if defined(PETSC_USE_COMPLEX)
1231: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1232: fft->n = (PetscInt)local_n0 * dim[1];
1233: MatSetSizes(A, fft->n, fft->n, fft->N, fft->N);
1234: #else
1235: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1237: fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
1238: MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1));
1239: #endif
1240: break;
1241: case 3:
1242: #if defined(PETSC_USE_COMPLEX)
1243: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
1245: fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
1246: MatSetSizes(A, fft->n, fft->n, fft->N, fft->N);
1247: #else
1248: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1250: fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
1251: MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1));
1252: #endif
1253: break;
1254: default:
1255: #if defined(PETSC_USE_COMPLEX)
1256: fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);
1258: fft->n = (PetscInt)local_n0 * partial_dim;
1259: MatSetSizes(A, fft->n, fft->n, fft->N, fft->N);
1260: #else
1261: temp = pdim[ndim - 1];
1263: pdim[ndim - 1] = temp / 2 + 1;
1265: fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1267: fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp;
1268: N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);
1270: pdim[ndim - 1] = temp;
1272: MatSetSizes(A, fft->n, fft->n, N1, N1);
1273: #endif
1274: break;
1275: }
1276: #endif
1277: }
1278: free(pdim);
1279: PetscObjectChangeTypeName((PetscObject)A, MATFFTW);
1280: PetscNew(&fftw);
1281: fft->data = (void *)fftw;
1283: fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */
1284: fftw->partial_dim = partial_dim;
1286: PetscMalloc1(ndim, &(fftw->dim_fftw));
1287: if (size == 1) {
1288: #if defined(PETSC_USE_64BIT_INDICES)
1289: fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1290: #else
1291: fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1292: #endif
1293: }
1295: for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];
1297: fftw->p_forward = NULL;
1298: fftw->p_backward = NULL;
1299: fftw->p_flag = FFTW_ESTIMATE;
1301: if (size == 1) {
1302: A->ops->mult = MatMult_SeqFFTW;
1303: A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1304: #if !PetscDefined(HAVE_MPIUNI)
1305: } else {
1306: A->ops->mult = MatMult_MPIFFTW;
1307: A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1308: #endif
1309: }
1310: fft->matdestroy = MatDestroy_FFTW;
1311: A->assembled = PETSC_TRUE;
1312: A->preallocated = PETSC_TRUE;
1314: PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW);
1315: PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW);
1316: PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW);
1318: /* get runtime options */
1319: PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
1320: PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg);
1321: if (flg) fftw->p_flag = iplans[p_flag];
1322: PetscOptionsEnd();
1323: return 0;
1324: }