Actual source code: fftw.c
1: /*
2: Provides an interface to the FFTW package.
3: Testing examples can be found in ~src/mat/tests
4: */
6: #include <../src/mat/impls/fft/fft.h>
7: EXTERN_C_BEGIN
8: #if !PetscDefined(HAVE_MPIUNI)
9: #include <fftw3-mpi.h>
10: #else
11: #include <fftw3.h>
12: #endif
13: EXTERN_C_END
15: typedef struct {
16: ptrdiff_t ndim_fftw, *dim_fftw;
17: #if defined(PETSC_USE_64BIT_INDICES)
18: fftw_iodim64 *iodims;
19: #else
20: fftw_iodim *iodims;
21: #endif
22: PetscInt partial_dim;
23: fftw_plan p_forward, p_backward;
24: unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
25: PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw plan should be
26: executed for the arrays with which the plan was created */
27: } Mat_FFTW;
29: extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec);
30: extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec);
31: extern PetscErrorCode MatDestroy_FFTW(Mat);
32: extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);
33: #if !PetscDefined(HAVE_MPIUNI)
34: extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec);
35: extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec);
36: extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
37: #endif
39: PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
40: {
41: Mat_FFT *fft = (Mat_FFT *)A->data;
42: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
43: const PetscScalar *x_array;
44: PetscScalar *y_array;
45: #if defined(PETSC_USE_COMPLEX)
46: #if defined(PETSC_USE_64BIT_INDICES)
47: fftw_iodim64 *iodims;
48: #else
49: fftw_iodim *iodims;
50: #endif
51: PetscInt i;
52: #endif
53: PetscInt ndim = fft->ndim, *dim = fft->dim;
55: PetscFunctionBegin;
56: PetscCall(VecGetArrayRead(x, &x_array));
57: PetscCall(VecGetArray(y, &y_array));
58: if (!fftw->p_forward) { /* create a plan, then execute it */
59: switch (ndim) {
60: case 1:
61: #if defined(PETSC_USE_COMPLEX)
62: fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
63: #else
64: fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
65: #endif
66: break;
67: case 2:
68: #if defined(PETSC_USE_COMPLEX)
69: fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
70: #else
71: fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
72: #endif
73: break;
74: case 3:
75: #if defined(PETSC_USE_COMPLEX)
76: fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
77: #else
78: fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
79: #endif
80: break;
81: default:
82: #if defined(PETSC_USE_COMPLEX)
83: iodims = fftw->iodims;
84: #if defined(PETSC_USE_64BIT_INDICES)
85: if (ndim) {
86: iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1];
87: iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
88: for (i = ndim - 2; i >= 0; --i) {
89: iodims[i].n = (ptrdiff_t)dim[i];
90: iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
91: }
92: }
93: fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
94: #else
95: if (ndim) {
96: iodims[ndim - 1].n = (int)dim[ndim - 1];
97: iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
98: for (i = ndim - 2; i >= 0; --i) {
99: iodims[i].n = (int)dim[i];
100: iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
101: }
102: }
103: fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
104: #endif
106: #else
107: fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
108: #endif
109: break;
110: }
111: fftw->finarray = (PetscScalar *)x_array;
112: fftw->foutarray = y_array;
113: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
114: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
115: fftw_execute(fftw->p_forward);
116: } else { /* use existing plan */
117: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
118: #if defined(PETSC_USE_COMPLEX)
119: fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
120: #else
121: fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
122: #endif
123: } else {
124: fftw_execute(fftw->p_forward);
125: }
126: }
127: PetscCall(VecRestoreArray(y, &y_array));
128: PetscCall(VecRestoreArrayRead(x, &x_array));
129: PetscFunctionReturn(PETSC_SUCCESS);
130: }
132: PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
133: {
134: Mat_FFT *fft = (Mat_FFT *)A->data;
135: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
136: const PetscScalar *x_array;
137: PetscScalar *y_array;
138: PetscInt ndim = fft->ndim, *dim = fft->dim;
139: #if defined(PETSC_USE_COMPLEX)
140: #if defined(PETSC_USE_64BIT_INDICES)
141: fftw_iodim64 *iodims = fftw->iodims;
142: #else
143: fftw_iodim *iodims = fftw->iodims;
144: #endif
145: #endif
147: PetscFunctionBegin;
148: PetscCall(VecGetArrayRead(x, &x_array));
149: PetscCall(VecGetArray(y, &y_array));
150: if (!fftw->p_backward) { /* create a plan, then execute it */
151: switch (ndim) {
152: case 1:
153: #if defined(PETSC_USE_COMPLEX)
154: fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
155: #else
156: fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
157: #endif
158: break;
159: case 2:
160: #if defined(PETSC_USE_COMPLEX)
161: fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
162: #else
163: fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
164: #endif
165: break;
166: case 3:
167: #if defined(PETSC_USE_COMPLEX)
168: fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
169: #else
170: fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
171: #endif
172: break;
173: default:
174: #if defined(PETSC_USE_COMPLEX)
175: #if defined(PETSC_USE_64BIT_INDICES)
176: fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
177: #else
178: fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
179: #endif
180: #else
181: fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
182: #endif
183: break;
184: }
185: fftw->binarray = (PetscScalar *)x_array;
186: fftw->boutarray = y_array;
187: fftw_execute(fftw->p_backward);
188: } else { /* use existing plan */
189: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
190: #if defined(PETSC_USE_COMPLEX)
191: fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
192: #else
193: fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
194: #endif
195: } else {
196: fftw_execute(fftw->p_backward);
197: }
198: }
199: PetscCall(VecRestoreArray(y, &y_array));
200: PetscCall(VecRestoreArrayRead(x, &x_array));
201: PetscFunctionReturn(PETSC_SUCCESS);
202: }
204: #if !PetscDefined(HAVE_MPIUNI)
205: PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
206: {
207: Mat_FFT *fft = (Mat_FFT *)A->data;
208: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
209: const PetscScalar *x_array;
210: PetscScalar *y_array;
211: PetscInt ndim = fft->ndim, *dim = fft->dim;
212: MPI_Comm comm;
214: PetscFunctionBegin;
215: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
216: PetscCall(VecGetArrayRead(x, &x_array));
217: PetscCall(VecGetArray(y, &y_array));
218: if (!fftw->p_forward) { /* create a plan, then execute it */
219: switch (ndim) {
220: case 1:
221: #if defined(PETSC_USE_COMPLEX)
222: fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
223: #else
224: SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
225: #endif
226: break;
227: case 2:
228: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
229: fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
230: #else
231: fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
232: #endif
233: break;
234: case 3:
235: #if defined(PETSC_USE_COMPLEX)
236: fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
237: #else
238: fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
239: #endif
240: break;
241: default:
242: #if defined(PETSC_USE_COMPLEX)
243: fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
244: #else
245: fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
246: #endif
247: break;
248: }
249: fftw->finarray = (PetscScalar *)x_array;
250: fftw->foutarray = y_array;
251: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
252: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
253: fftw_execute(fftw->p_forward);
254: } else { /* use existing plan */
255: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
256: fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
257: } else {
258: fftw_execute(fftw->p_forward);
259: }
260: }
261: PetscCall(VecRestoreArray(y, &y_array));
262: PetscCall(VecRestoreArrayRead(x, &x_array));
263: PetscFunctionReturn(PETSC_SUCCESS);
264: }
266: PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
267: {
268: Mat_FFT *fft = (Mat_FFT *)A->data;
269: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
270: const PetscScalar *x_array;
271: PetscScalar *y_array;
272: PetscInt ndim = fft->ndim, *dim = fft->dim;
273: MPI_Comm comm;
275: PetscFunctionBegin;
276: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
277: PetscCall(VecGetArrayRead(x, &x_array));
278: PetscCall(VecGetArray(y, &y_array));
279: if (!fftw->p_backward) { /* create a plan, then execute it */
280: switch (ndim) {
281: case 1:
282: #if defined(PETSC_USE_COMPLEX)
283: fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
284: #else
285: SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
286: #endif
287: break;
288: case 2:
289: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
290: fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
291: #else
292: fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
293: #endif
294: break;
295: case 3:
296: #if defined(PETSC_USE_COMPLEX)
297: fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
298: #else
299: fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
300: #endif
301: break;
302: default:
303: #if defined(PETSC_USE_COMPLEX)
304: fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
305: #else
306: fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
307: #endif
308: break;
309: }
310: fftw->binarray = (PetscScalar *)x_array;
311: fftw->boutarray = y_array;
312: fftw_execute(fftw->p_backward);
313: } else { /* use existing plan */
314: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
315: fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
316: } else {
317: fftw_execute(fftw->p_backward);
318: }
319: }
320: PetscCall(VecRestoreArray(y, &y_array));
321: PetscCall(VecRestoreArrayRead(x, &x_array));
322: PetscFunctionReturn(PETSC_SUCCESS);
323: }
324: #endif
326: PetscErrorCode MatDestroy_FFTW(Mat A)
327: {
328: Mat_FFT *fft = (Mat_FFT *)A->data;
329: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
331: PetscFunctionBegin;
332: fftw_destroy_plan(fftw->p_forward);
333: fftw_destroy_plan(fftw->p_backward);
334: if (fftw->iodims) free(fftw->iodims);
335: PetscCall(PetscFree(fftw->dim_fftw));
336: PetscCall(PetscFree(fft->data));
337: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
338: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
339: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
340: #if !PetscDefined(HAVE_MPIUNI)
341: fftw_mpi_cleanup();
342: #endif
343: PetscFunctionReturn(PETSC_SUCCESS);
344: }
346: #if !PetscDefined(HAVE_MPIUNI)
347: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
348: PetscErrorCode VecDestroy_MPIFFTW(Vec v)
349: {
350: PetscScalar *array;
352: PetscFunctionBegin;
353: PetscCall(VecGetArray(v, &array));
354: fftw_free((fftw_complex *)array);
355: PetscCall(VecRestoreArray(v, &array));
356: PetscCall(VecDestroy_MPI(v));
357: PetscFunctionReturn(PETSC_SUCCESS);
358: }
359: #endif
361: #if !PetscDefined(HAVE_MPIUNI)
362: static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
363: {
364: Mat A;
366: PetscFunctionBegin;
367: PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
368: PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
369: PetscFunctionReturn(PETSC_SUCCESS);
370: }
372: static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
373: {
374: Mat A;
376: PetscFunctionBegin;
377: PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
378: PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
379: PetscFunctionReturn(PETSC_SUCCESS);
380: }
382: static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
383: {
384: Mat A;
386: PetscFunctionBegin;
387: PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
388: PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
389: PetscFunctionReturn(PETSC_SUCCESS);
390: }
391: #endif
393: /*@
394: MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
395: parallel layout determined by `MATFFTW`
397: Collective
399: Input Parameter:
400: . A - the matrix
402: Output Parameters:
403: + x - (optional) input vector of forward FFTW
404: . y - (optional) output vector of forward FFTW
405: - z - (optional) output vector of backward FFTW
407: Options Database Key:
408: . -mat_fftw_plannerflags - set FFTW planner flags
410: Level: advanced
412: Notes:
413: The parallel layout of output of forward FFTW is always same as the input
414: of the backward FFTW. But parallel layout of the input vector of forward
415: FFTW might not be same as the output of backward FFTW.
417: We need to provide enough space while doing parallel real transform.
418: We need to pad extra zeros at the end of last dimension. For this reason the one needs to
419: invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
420: last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
421: depends on if the last dimension is even or odd. If the last dimension is even need to pad two
422: zeros if it is odd only one zero is needed.
424: Lastly one needs some scratch space at the end of data set in each process. alloc_local
425: figures out how much space is needed, i.e. it figures out the data+scratch space for
426: each processor and returns that.
428: Developer Notes:
429: How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?
431: .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
432: @*/
433: PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
434: {
435: PetscFunctionBegin;
436: PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
437: PetscFunctionReturn(PETSC_SUCCESS);
438: }
440: PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
441: {
442: PetscMPIInt size, rank;
443: MPI_Comm comm;
444: Mat_FFT *fft = (Mat_FFT *)A->data;
446: PetscFunctionBegin;
449: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
451: PetscCallMPI(MPI_Comm_size(comm, &size));
452: PetscCallMPI(MPI_Comm_rank(comm, &rank));
453: if (size == 1) { /* sequential case */
454: #if defined(PETSC_USE_COMPLEX)
455: if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
456: if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
457: if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
458: #else
459: if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
460: if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
461: if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
462: #endif
463: #if !PetscDefined(HAVE_MPIUNI)
464: } else { /* parallel cases */
465: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
466: PetscInt ndim = fft->ndim, *dim = fft->dim;
467: ptrdiff_t alloc_local, local_n0, local_0_start;
468: ptrdiff_t local_n1;
469: fftw_complex *data_fout;
470: ptrdiff_t local_1_start;
471: #if defined(PETSC_USE_COMPLEX)
472: fftw_complex *data_fin, *data_bout;
473: #else
474: double *data_finr, *data_boutr;
475: PetscInt n1, N1;
476: ptrdiff_t temp;
477: #endif
479: switch (ndim) {
480: case 1:
481: #if !defined(PETSC_USE_COMPLEX)
482: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
483: #else
484: alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
485: if (fin) {
486: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
487: PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin));
488: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
489: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
490: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
491: }
492: if (fout) {
493: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
494: PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout));
495: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
496: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
497: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
498: }
499: alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
500: if (bout) {
501: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
502: PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout));
503: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
504: (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
505: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
506: }
507: break;
508: #endif
509: case 2:
510: #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
511: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
512: N1 = 2 * dim[0] * (dim[1] / 2 + 1);
513: n1 = 2 * local_n0 * (dim[1] / 2 + 1);
514: if (fin) {
515: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
516: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
517: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
518: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
519: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
520: }
521: if (fout) {
522: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
523: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
524: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
525: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
526: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
527: }
528: if (bout) {
529: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
530: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
531: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
532: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
533: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
534: }
535: #else
536: /* Get local size */
537: alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
538: if (fin) {
539: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
540: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
541: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
542: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
543: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
544: }
545: if (fout) {
546: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
547: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
548: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
549: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
550: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
551: }
552: if (bout) {
553: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
554: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
555: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
556: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
557: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
558: }
559: #endif
560: break;
561: case 3:
562: #if !defined(PETSC_USE_COMPLEX)
563: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
564: N1 = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1);
565: n1 = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1);
566: if (fin) {
567: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
568: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
569: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
570: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
571: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
572: }
573: if (fout) {
574: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
575: PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
576: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
577: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
578: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
579: }
580: if (bout) {
581: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
582: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
583: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
584: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
585: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
586: }
587: #else
588: alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
589: if (fin) {
590: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
591: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
592: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
593: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
594: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
595: }
596: if (fout) {
597: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
598: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
599: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
600: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
601: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
602: }
603: if (bout) {
604: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
605: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
606: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
607: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
608: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
609: }
610: #endif
611: break;
612: default:
613: #if !defined(PETSC_USE_COMPLEX)
614: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
615: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
616: alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
617: N1 = 2 * fft->N * (PetscInt)(fftw->dim_fftw[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
618: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
620: if (fin) {
621: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
622: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
623: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
624: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
625: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
626: }
627: if (fout) {
628: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
629: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
630: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
631: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
632: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
633: }
634: if (bout) {
635: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
636: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
637: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
638: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
639: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
640: }
641: #else
642: alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
643: if (fin) {
644: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
645: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
646: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
647: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
648: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
649: }
650: if (fout) {
651: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
652: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
653: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
654: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
655: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
656: }
657: if (bout) {
658: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
659: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
660: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
661: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
662: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
663: }
664: #endif
665: break;
666: }
667: /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
668: v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
669: memory leaks. We void these pointers here to avoid accident uses.
670: */
671: if (fin) (*fin)->ops->replacearray = NULL;
672: if (fout) (*fout)->ops->replacearray = NULL;
673: if (bout) (*bout)->ops->replacearray = NULL;
674: #endif
675: }
676: PetscFunctionReturn(PETSC_SUCCESS);
677: }
679: /*@
680: VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.
682: Collective
684: Input Parameters:
685: + A - FFTW matrix
686: - x - the PETSc vector
688: Output Parameter:
689: . y - the FFTW vector
691: Level: intermediate
693: Note:
694: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
695: one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
696: zeros. This routine does that job by scattering operation.
698: .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
699: @*/
700: PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
701: {
702: PetscFunctionBegin;
703: PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
704: PetscFunctionReturn(PETSC_SUCCESS);
705: }
707: static PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
708: {
709: MPI_Comm comm;
710: Mat_FFT *fft = (Mat_FFT *)A->data;
711: PetscInt low;
712: PetscMPIInt rank, size;
713: PetscInt vsize, vsize1;
714: VecScatter vecscat;
715: IS list1;
717: PetscFunctionBegin;
718: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
719: PetscCallMPI(MPI_Comm_size(comm, &size));
720: PetscCallMPI(MPI_Comm_rank(comm, &rank));
721: PetscCall(VecGetOwnershipRange(y, &low, NULL));
723: if (size == 1) {
724: PetscCall(VecGetSize(x, &vsize));
725: PetscCall(VecGetSize(y, &vsize1));
726: PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
727: PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
728: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
729: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
730: PetscCall(VecScatterDestroy(&vecscat));
731: PetscCall(ISDestroy(&list1));
732: #if !PetscDefined(HAVE_MPIUNI)
733: } else {
734: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
735: PetscInt ndim = fft->ndim, *dim = fft->dim;
736: ptrdiff_t local_n0, local_0_start;
737: ptrdiff_t local_n1, local_1_start;
738: IS list2;
739: #if !defined(PETSC_USE_COMPLEX)
740: PetscInt i, j, k, partial_dim;
741: PetscInt *indx1, *indx2, tempindx, tempindx1;
742: PetscInt NM;
743: ptrdiff_t temp;
744: #endif
746: switch (ndim) {
747: case 1:
748: #if defined(PETSC_USE_COMPLEX)
749: fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
751: PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1));
752: PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2));
753: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
754: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
755: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
756: PetscCall(VecScatterDestroy(&vecscat));
757: PetscCall(ISDestroy(&list1));
758: PetscCall(ISDestroy(&list2));
759: #else
760: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
761: #endif
762: break;
763: case 2:
764: #if defined(PETSC_USE_COMPLEX)
765: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
767: PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
768: PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
769: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
770: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
771: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
772: PetscCall(VecScatterDestroy(&vecscat));
773: PetscCall(ISDestroy(&list1));
774: PetscCall(ISDestroy(&list2));
775: #else
776: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
778: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
779: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));
781: if (dim[1] % 2 == 0) {
782: NM = dim[1] + 2;
783: } else {
784: NM = dim[1] + 1;
785: }
786: for (i = 0; i < local_n0; i++) {
787: for (j = 0; j < dim[1]; j++) {
788: tempindx = i * dim[1] + j;
789: tempindx1 = i * NM + j;
791: indx1[tempindx] = local_0_start * dim[1] + tempindx;
792: indx2[tempindx] = low + tempindx1;
793: }
794: }
796: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
797: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
799: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
800: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
801: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
802: PetscCall(VecScatterDestroy(&vecscat));
803: PetscCall(ISDestroy(&list1));
804: PetscCall(ISDestroy(&list2));
805: PetscCall(PetscFree(indx1));
806: PetscCall(PetscFree(indx2));
807: #endif
808: break;
810: case 3:
811: #if defined(PETSC_USE_COMPLEX)
812: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
814: PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
815: PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
816: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
817: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
818: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
819: PetscCall(VecScatterDestroy(&vecscat));
820: PetscCall(ISDestroy(&list1));
821: PetscCall(ISDestroy(&list2));
822: #else
823: /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
824: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
825: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
827: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
828: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));
830: if (dim[2] % 2 == 0) NM = dim[2] + 2;
831: else NM = dim[2] + 1;
833: for (i = 0; i < local_n0; i++) {
834: for (j = 0; j < dim[1]; j++) {
835: for (k = 0; k < dim[2]; k++) {
836: tempindx = i * dim[1] * dim[2] + j * dim[2] + k;
837: tempindx1 = i * dim[1] * NM + j * NM + k;
839: indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
840: indx2[tempindx] = low + tempindx1;
841: }
842: }
843: }
845: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
846: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
847: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
848: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
849: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
850: PetscCall(VecScatterDestroy(&vecscat));
851: PetscCall(ISDestroy(&list1));
852: PetscCall(ISDestroy(&list2));
853: PetscCall(PetscFree(indx1));
854: PetscCall(PetscFree(indx2));
855: #endif
856: break;
858: default:
859: #if defined(PETSC_USE_COMPLEX)
860: fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
862: PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
863: PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
864: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
865: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
866: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
867: PetscCall(VecScatterDestroy(&vecscat));
868: PetscCall(ISDestroy(&list1));
869: PetscCall(ISDestroy(&list2));
870: #else
871: /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
872: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
873: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
875: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
877: fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
879: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
881: partial_dim = fftw->partial_dim;
883: PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
884: PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));
886: if (dim[ndim - 1] % 2 == 0) NM = 2;
887: else NM = 1;
889: j = low;
890: for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
891: indx1[i] = local_0_start * partial_dim + i;
892: indx2[i] = j;
893: if (k % dim[ndim - 1] == 0) j += NM;
894: j++;
895: }
896: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
897: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
898: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
899: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
900: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
901: PetscCall(VecScatterDestroy(&vecscat));
902: PetscCall(ISDestroy(&list1));
903: PetscCall(ISDestroy(&list2));
904: PetscCall(PetscFree(indx1));
905: PetscCall(PetscFree(indx2));
906: #endif
907: break;
908: }
909: #endif
910: }
911: PetscFunctionReturn(PETSC_SUCCESS);
912: }
914: /*@
915: VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.
917: Collective
919: Input Parameters:
920: + A - `MATFFTW` matrix
921: - x - FFTW vector
923: Output Parameter:
924: . y - PETSc vector
926: Level: intermediate
928: Note:
929: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
930: `VecScatterFFTWToPetsc()` removes those extra zeros.
932: .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
933: @*/
934: PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
935: {
936: PetscFunctionBegin;
937: PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
938: PetscFunctionReturn(PETSC_SUCCESS);
939: }
941: static PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
942: {
943: MPI_Comm comm;
944: Mat_FFT *fft = (Mat_FFT *)A->data;
945: PetscInt low;
946: PetscMPIInt rank, size;
947: VecScatter vecscat;
948: IS list1;
950: PetscFunctionBegin;
951: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
952: PetscCallMPI(MPI_Comm_size(comm, &size));
953: PetscCallMPI(MPI_Comm_rank(comm, &rank));
954: PetscCall(VecGetOwnershipRange(x, &low, NULL));
956: if (size == 1) {
957: PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
958: PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
959: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
960: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
961: PetscCall(VecScatterDestroy(&vecscat));
962: PetscCall(ISDestroy(&list1));
964: #if !PetscDefined(HAVE_MPIUNI)
965: } else {
966: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
967: PetscInt ndim = fft->ndim, *dim = fft->dim;
968: ptrdiff_t local_n0, local_0_start;
969: ptrdiff_t local_n1, local_1_start;
970: IS list2;
971: #if !defined(PETSC_USE_COMPLEX)
972: PetscInt i, j, k, partial_dim;
973: PetscInt *indx1, *indx2, tempindx, tempindx1;
974: PetscInt NM;
975: ptrdiff_t temp;
976: #endif
977: switch (ndim) {
978: case 1:
979: #if defined(PETSC_USE_COMPLEX)
980: fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
982: PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1));
983: PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2));
984: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
985: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
986: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
987: PetscCall(VecScatterDestroy(&vecscat));
988: PetscCall(ISDestroy(&list1));
989: PetscCall(ISDestroy(&list2));
990: #else
991: SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
992: #endif
993: break;
994: case 2:
995: #if defined(PETSC_USE_COMPLEX)
996: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
998: PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
999: PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
1000: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1001: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1002: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1003: PetscCall(VecScatterDestroy(&vecscat));
1004: PetscCall(ISDestroy(&list1));
1005: PetscCall(ISDestroy(&list2));
1006: #else
1007: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1009: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
1010: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));
1012: if (dim[1] % 2 == 0) NM = dim[1] + 2;
1013: else NM = dim[1] + 1;
1015: for (i = 0; i < local_n0; i++) {
1016: for (j = 0; j < dim[1]; j++) {
1017: tempindx = i * dim[1] + j;
1018: tempindx1 = i * NM + j;
1020: indx1[tempindx] = local_0_start * dim[1] + tempindx;
1021: indx2[tempindx] = low + tempindx1;
1022: }
1023: }
1025: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
1026: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
1028: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1029: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1030: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1031: PetscCall(VecScatterDestroy(&vecscat));
1032: PetscCall(ISDestroy(&list1));
1033: PetscCall(ISDestroy(&list2));
1034: PetscCall(PetscFree(indx1));
1035: PetscCall(PetscFree(indx2));
1036: #endif
1037: break;
1038: case 3:
1039: #if defined(PETSC_USE_COMPLEX)
1040: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
1042: PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
1043: PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
1044: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1045: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1046: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1047: PetscCall(VecScatterDestroy(&vecscat));
1048: PetscCall(ISDestroy(&list1));
1049: PetscCall(ISDestroy(&list2));
1050: #else
1051: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1053: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
1054: PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));
1056: if (dim[2] % 2 == 0) NM = dim[2] + 2;
1057: else NM = dim[2] + 1;
1059: for (i = 0; i < local_n0; i++) {
1060: for (j = 0; j < dim[1]; j++) {
1061: for (k = 0; k < dim[2]; k++) {
1062: tempindx = i * dim[1] * dim[2] + j * dim[2] + k;
1063: tempindx1 = i * dim[1] * NM + j * NM + k;
1065: indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
1066: indx2[tempindx] = low + tempindx1;
1067: }
1068: }
1069: }
1071: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
1072: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
1074: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1075: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1076: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1077: PetscCall(VecScatterDestroy(&vecscat));
1078: PetscCall(ISDestroy(&list1));
1079: PetscCall(ISDestroy(&list2));
1080: PetscCall(PetscFree(indx1));
1081: PetscCall(PetscFree(indx2));
1082: #endif
1083: break;
1084: default:
1085: #if defined(PETSC_USE_COMPLEX)
1086: fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
1088: PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
1089: PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
1090: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1091: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1092: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1093: PetscCall(VecScatterDestroy(&vecscat));
1094: PetscCall(ISDestroy(&list1));
1095: PetscCall(ISDestroy(&list2));
1096: #else
1097: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
1099: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
1101: fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1103: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
1105: partial_dim = fftw->partial_dim;
1107: PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
1108: PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));
1110: if (dim[ndim - 1] % 2 == 0) NM = 2;
1111: else NM = 1;
1113: j = low;
1114: for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
1115: indx1[i] = local_0_start * partial_dim + i;
1116: indx2[i] = j;
1117: if (k % dim[ndim - 1] == 0) j += NM;
1118: j++;
1119: }
1120: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
1121: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
1123: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1124: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1125: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1126: PetscCall(VecScatterDestroy(&vecscat));
1127: PetscCall(ISDestroy(&list1));
1128: PetscCall(ISDestroy(&list2));
1129: PetscCall(PetscFree(indx1));
1130: PetscCall(PetscFree(indx2));
1131: #endif
1132: break;
1133: }
1134: #endif
1135: }
1136: PetscFunctionReturn(PETSC_SUCCESS);
1137: }
1139: /*MC
1140: MATFFTW - "fftw" - Matrix type that provides FFT via the FFTW external package.
1142: Level: intermediate
1144: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()`
1145: M*/
1146: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1147: {
1148: MPI_Comm comm;
1149: Mat_FFT *fft = (Mat_FFT *)A->data;
1150: Mat_FFTW *fftw;
1151: PetscInt ndim = fft->ndim, *dim = fft->dim;
1152: const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
1153: unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1154: PetscBool flg;
1155: PetscInt p_flag, partial_dim = 1, ctr;
1156: PetscMPIInt size, rank;
1157: ptrdiff_t *pdim;
1158: #if !defined(PETSC_USE_COMPLEX)
1159: PetscInt tot_dim;
1160: #endif
1162: PetscFunctionBegin;
1163: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1164: PetscCallMPI(MPI_Comm_size(comm, &size));
1165: PetscCallMPI(MPI_Comm_rank(comm, &rank));
1167: #if !PetscDefined(HAVE_MPIUNI)
1168: fftw_mpi_init();
1169: #endif
1170: pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
1171: pdim[0] = dim[0];
1172: #if !defined(PETSC_USE_COMPLEX)
1173: tot_dim = 2 * dim[0];
1174: #endif
1175: for (ctr = 1; ctr < ndim; ctr++) {
1176: partial_dim *= dim[ctr];
1177: pdim[ctr] = dim[ctr];
1178: #if !defined(PETSC_USE_COMPLEX)
1179: if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1180: else tot_dim *= dim[ctr];
1181: #endif
1182: }
1184: if (size == 1) {
1185: #if defined(PETSC_USE_COMPLEX)
1186: PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
1187: fft->n = fft->N;
1188: #else
1189: PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
1190: fft->n = tot_dim;
1191: #endif
1192: #if !PetscDefined(HAVE_MPIUNI)
1193: } else {
1194: ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
1195: #if !defined(PETSC_USE_COMPLEX)
1196: ptrdiff_t temp;
1197: PetscInt N1;
1198: #endif
1200: switch (ndim) {
1201: case 1:
1202: #if !defined(PETSC_USE_COMPLEX)
1203: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1204: #else
1205: fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1206: fft->n = (PetscInt)local_n0;
1207: PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N));
1208: #endif
1209: break;
1210: case 2:
1211: #if defined(PETSC_USE_COMPLEX)
1212: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1213: fft->n = (PetscInt)local_n0 * dim[1];
1214: PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1215: #else
1216: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1218: fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
1219: PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
1220: #endif
1221: break;
1222: case 3:
1223: #if defined(PETSC_USE_COMPLEX)
1224: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
1226: fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
1227: PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1228: #else
1229: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1231: fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
1232: PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
1233: #endif
1234: break;
1235: default:
1236: #if defined(PETSC_USE_COMPLEX)
1237: fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);
1239: fft->n = (PetscInt)local_n0 * partial_dim;
1240: PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1241: #else
1242: temp = pdim[ndim - 1];
1244: pdim[ndim - 1] = temp / 2 + 1;
1246: fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1248: fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp;
1249: N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);
1251: pdim[ndim - 1] = temp;
1253: PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1254: #endif
1255: break;
1256: }
1257: #endif
1258: }
1259: free(pdim);
1260: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
1261: PetscCall(PetscNew(&fftw));
1262: fft->data = (void *)fftw;
1264: fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */
1265: fftw->partial_dim = partial_dim;
1267: PetscCall(PetscMalloc1(ndim, &fftw->dim_fftw));
1268: if (size == 1) {
1269: #if defined(PETSC_USE_64BIT_INDICES)
1270: fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1271: #else
1272: fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1273: #endif
1274: }
1276: for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];
1278: fftw->p_forward = NULL;
1279: fftw->p_backward = NULL;
1280: fftw->p_flag = FFTW_ESTIMATE;
1282: if (size == 1) {
1283: A->ops->mult = MatMult_SeqFFTW;
1284: A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1285: #if !PetscDefined(HAVE_MPIUNI)
1286: } else {
1287: A->ops->mult = MatMult_MPIFFTW;
1288: A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1289: #endif
1290: }
1291: fft->matdestroy = MatDestroy_FFTW;
1292: A->assembled = PETSC_TRUE;
1293: A->preallocated = PETSC_TRUE;
1295: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
1296: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
1297: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));
1299: /* get runtime options */
1300: PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
1301: PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
1302: if (flg) fftw->p_flag = iplans[p_flag];
1303: PetscOptionsEnd();
1304: PetscFunctionReturn(PETSC_SUCCESS);
1305: }