Actual source code: comb.c
1: /*
2: Split phase global vector reductions with support for combining the
3: communication portion of several operations. Using MPI-1.1 support only
5: The idea for this and much of the initial code is contributed by
6: Victor Eijkhout.
8: Usage:
9: VecDotBegin(Vec,Vec,PetscScalar *);
10: VecNormBegin(Vec,NormType,PetscReal *);
11: ....
12: VecDotEnd(Vec,Vec,PetscScalar *);
13: VecNormEnd(Vec,NormType,PetscReal *);
15: Limitations:
16: - The order of the xxxEnd() functions MUST be in the same order
17: as the xxxBegin(). There is extensive error checking to try to
18: insure that the user calls the routines in the correct order
19: */
21: #include <petsc/private/vecimpl.h>
23: static PetscMPIInt MPIU_Iallreduce(void *sendbuf, void *recvbuf, PetscMPIInt count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request)
24: {
25: PetscMPIInt err;
27: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
28: err = MPI_Iallreduce(sendbuf, recvbuf, count, datatype, op, comm, request);
29: #else
30: err = MPIU_Allreduce(sendbuf, recvbuf, count, datatype, op, comm);
31: *request = MPI_REQUEST_NULL;
32: #endif
33: return err;
34: }
36: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *);
38: /*
39: PetscSplitReductionCreate - Creates a data structure to contain the queued information.
40: */
41: static PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm, PetscSplitReduction **sr)
42: {
43: PetscFunctionBegin;
44: PetscCall(PetscNew(sr));
45: (*sr)->numopsbegin = 0;
46: (*sr)->numopsend = 0;
47: (*sr)->state = STATE_BEGIN;
48: #define MAXOPS 32
49: (*sr)->maxops = MAXOPS;
50: PetscCall(PetscMalloc6(MAXOPS, &(*sr)->lvalues, MAXOPS, &(*sr)->gvalues, MAXOPS, &(*sr)->invecs, MAXOPS, &(*sr)->reducetype, MAXOPS, &(*sr)->lvalues_mix, MAXOPS, &(*sr)->gvalues_mix));
51: #undef MAXOPS
52: (*sr)->comm = comm;
53: (*sr)->request = MPI_REQUEST_NULL;
54: (*sr)->mix = PETSC_FALSE;
55: (*sr)->async = PetscDefined(HAVE_MPI_NONBLOCKING_COLLECTIVES) ? PETSC_TRUE : PETSC_FALSE; /* Enable by default */
56: /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
57: PetscCall(PetscOptionsGetBool(NULL, NULL, "-splitreduction_async", &(*sr)->async, NULL));
58: PetscFunctionReturn(PETSC_SUCCESS);
59: }
61: /*
62: This function is the MPI reduction operation used when there is
63: a combination of sums and max in the reduction. The call below to
64: MPI_Op_create() converts the function PetscSplitReduction_Local() to the
65: MPI operator PetscSplitReduction_Op.
66: */
67: MPI_Op PetscSplitReduction_Op = 0;
69: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in, void *out, PetscMPIInt *cnt, MPI_Datatype *datatype)
70: {
71: struct PetscScalarInt {
72: PetscScalar v;
73: PetscInt i;
74: };
75: struct PetscScalarInt *xin = (struct PetscScalarInt *)in;
76: struct PetscScalarInt *xout = (struct PetscScalarInt *)out;
77: PetscInt i, count = *cnt;
79: PetscFunctionBegin;
80: if (*datatype != MPIU_SCALAR_INT) {
81: PetscCallAbort(MPI_COMM_SELF, (*PetscErrorPrintf)("Can only handle MPIU_SCALAR_INT data types"));
82: PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
83: }
84: for (i = 0; i < count; i++) {
85: if (xin[i].i == PETSC_SR_REDUCE_SUM) xout[i].v += xin[i].v;
86: else if (xin[i].i == PETSC_SR_REDUCE_MAX) xout[i].v = PetscMax(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
87: else if (xin[i].i == PETSC_SR_REDUCE_MIN) xout[i].v = PetscMin(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
88: else {
89: PetscCallAbort(MPI_COMM_SELF, (*PetscErrorPrintf)("Reduction type input is not PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN"));
90: PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
91: }
92: }
93: PetscFunctionReturnVoid();
94: }
96: /*@
97: PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction
99: Collective but not synchronizing
101: Input Parameter:
102: . comm - communicator on which split reduction has been queued
104: Level: advanced
106: Note:
107: Calling this function is optional when using split-mode reduction. On supporting hardware,
108: calling this after all VecXxxBegin() allows the reduction to make asynchronous progress
109: before the result is needed (in VecXxxEnd()).
111: .seealso: `VecNormBegin()`, `VecNormEnd()`, `VecDotBegin()`, `VecDotEnd()`, `VecTDotBegin()`, `VecTDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`
112: @*/
113: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
114: {
115: PetscSplitReduction *sr;
117: PetscFunctionBegin;
118: if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
119: PetscCall(PetscSplitReductionGet(comm, &sr));
120: PetscCheck(sr->numopsend <= 0, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot call this after VecxxxEnd() has been called");
121: if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
122: PetscMPIInt numops = sr->numopsbegin;
123: PetscSRReductionType *reducetype = sr->reducetype;
124: PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
125: PetscInt sum_flg = 0, max_flg = 0, min_flg = 0;
126: MPI_Comm comm = sr->comm;
127: PetscMPIInt size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);
129: PetscCall(PetscLogEventBegin(VEC_ReduceBegin, 0, 0, 0, 0));
130: PetscCallMPI(MPI_Comm_size(sr->comm, &size));
131: if (size == 1) {
132: PetscCall(PetscArraycpy(gvalues, lvalues, numops));
133: } else {
134: /* determine if all reductions are sum, max, or min */
135: for (PetscMPIInt i = 0; i < numops; i++) {
136: if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
137: else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
138: else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
139: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
140: }
141: PetscCheck(sum_flg + max_flg + min_flg <= 1 || !sr->mix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
142: if (sum_flg + max_flg + min_flg > 1) {
143: sr->mix = PETSC_TRUE;
144: for (PetscMPIInt i = 0; i < numops; i++) {
145: sr->lvalues_mix[i].v = lvalues[i];
146: sr->lvalues_mix[i].i = reducetype[i];
147: }
148: PetscCallMPI(MPIU_Iallreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm, &sr->request));
149: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
150: PetscCallMPI(MPIU_Iallreduce(lvalues, gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm, &sr->request));
151: } else if (min_flg) {
152: PetscCallMPI(MPIU_Iallreduce(lvalues, gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm, &sr->request));
153: } else {
154: PetscCallMPI(MPIU_Iallreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm, &sr->request));
155: }
156: }
157: sr->state = STATE_PENDING;
158: sr->numopsend = 0;
159: PetscCall(PetscLogEventEnd(VEC_ReduceBegin, 0, 0, 0, 0));
160: } else {
161: PetscCall(PetscSplitReductionApply(sr));
162: }
163: PetscFunctionReturn(PETSC_SUCCESS);
164: }
166: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
167: {
168: PetscFunctionBegin;
169: switch (sr->state) {
170: case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
171: PetscCall(PetscSplitReductionApply(sr));
172: break;
173: case STATE_PENDING:
174: /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
175: PetscCall(PetscLogEventBegin(VEC_ReduceEnd, 0, 0, 0, 0));
176: if (sr->request != MPI_REQUEST_NULL) PetscCallMPI(MPI_Wait(&sr->request, MPI_STATUS_IGNORE));
177: sr->state = STATE_END;
178: if (sr->mix) {
179: for (PetscMPIInt i = 0; i < sr->numopsbegin; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
180: sr->mix = PETSC_FALSE;
181: }
182: PetscCall(PetscLogEventEnd(VEC_ReduceEnd, 0, 0, 0, 0));
183: break;
184: default:
185: break; /* everything is already done */
186: }
187: PetscFunctionReturn(PETSC_SUCCESS);
188: }
190: /*
191: PetscSplitReductionApply - Actually do the communication required for a split phase reduction
192: */
193: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
194: {
195: PetscMPIInt numops = sr->numopsbegin;
196: PetscSRReductionType *reducetype = sr->reducetype;
197: PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
198: PetscInt sum_flg = 0, max_flg = 0, min_flg = 0;
199: MPI_Comm comm = sr->comm;
200: PetscMPIInt size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);
202: PetscFunctionBegin;
203: PetscCheck(sr->numopsend <= 0, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot call this after VecxxxEnd() has been called");
204: PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
205: PetscCallMPI(MPI_Comm_size(sr->comm, &size));
206: if (size == 1) {
207: PetscCall(PetscArraycpy(gvalues, lvalues, numops));
208: } else {
209: /* determine if all reductions are sum, max, or min */
210: for (PetscMPIInt i = 0; i < numops; i++) {
211: if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
212: else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
213: else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
214: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
215: }
216: if (sum_flg + max_flg + min_flg > 1) {
217: PetscCheck(!sr->mix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
218: for (PetscMPIInt i = 0; i < numops; i++) {
219: sr->lvalues_mix[i].v = lvalues[i];
220: sr->lvalues_mix[i].i = reducetype[i];
221: }
222: PetscCallMPI(MPIU_Allreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm));
223: for (PetscMPIInt i = 0; i < numops; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
224: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
225: PetscCallMPI(MPIU_Allreduce(lvalues, gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm));
226: } else if (min_flg) {
227: PetscCallMPI(MPIU_Allreduce(lvalues, gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm));
228: } else {
229: PetscCallMPI(MPIU_Allreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm));
230: }
231: }
232: sr->state = STATE_END;
233: sr->numopsend = 0;
234: PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
235: PetscFunctionReturn(PETSC_SUCCESS);
236: }
238: /*
239: PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
240: */
241: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
242: {
243: struct PetscScalarInt {
244: PetscScalar v;
245: PetscInt i;
246: };
247: PetscMPIInt maxops = sr->maxops;
248: PetscSRReductionType *reducetype = sr->reducetype;
249: PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
250: struct PetscScalarInt *lvalues_mix = (struct PetscScalarInt *)sr->lvalues_mix;
251: struct PetscScalarInt *gvalues_mix = (struct PetscScalarInt *)sr->gvalues_mix;
252: void **invecs = sr->invecs;
254: PetscFunctionBegin;
255: sr->maxops = 2 * maxops;
256: PetscCall(PetscMalloc6(2 * maxops, &sr->lvalues, 2 * maxops, &sr->gvalues, 2 * maxops, &sr->reducetype, 2 * maxops, &sr->invecs, 2 * maxops, &sr->lvalues_mix, 2 * maxops, &sr->gvalues_mix));
257: PetscCall(PetscArraycpy(sr->lvalues, lvalues, maxops));
258: PetscCall(PetscArraycpy(sr->gvalues, gvalues, maxops));
259: PetscCall(PetscArraycpy(sr->reducetype, reducetype, maxops));
260: PetscCall(PetscArraycpy(sr->invecs, invecs, maxops));
261: PetscCall(PetscArraycpy(sr->lvalues_mix, lvalues_mix, maxops));
262: PetscCall(PetscArraycpy(sr->gvalues_mix, gvalues_mix, maxops));
263: PetscCall(PetscFree6(lvalues, gvalues, reducetype, invecs, lvalues_mix, gvalues_mix));
264: PetscFunctionReturn(PETSC_SUCCESS);
265: }
267: static PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
268: {
269: PetscFunctionBegin;
270: PetscCall(PetscFree6(sr->lvalues, sr->gvalues, sr->reducetype, sr->invecs, sr->lvalues_mix, sr->gvalues_mix));
271: PetscCall(PetscFree(sr));
272: PetscFunctionReturn(PETSC_SUCCESS);
273: }
275: PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;
277: /*
278: Private routine to delete internal storage when a communicator is freed.
279: This is called by MPI, not by users.
281: The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
282: it was MPI_Comm *comm.
283: */
284: static PetscMPIInt MPIAPI Petsc_DelReduction(MPI_Comm comm, PETSC_UNUSED PetscMPIInt keyval, void *attr_val, PETSC_UNUSED void *extra_state)
285: {
286: PetscFunctionBegin;
287: PetscCallReturnMPI(PetscInfo(0, "Deleting reduction data in an MPI_Comm %ld\n", (long)comm));
288: PetscCallReturnMPI(PetscSplitReductionDestroy((PetscSplitReduction *)attr_val));
289: PetscFunctionReturn(PETSC_SUCCESS);
290: }
292: /*
293: PetscSplitReductionGet - Gets the split reduction object from a
294: PETSc vector, creates if it does not exit.
296: */
297: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm, PetscSplitReduction **sr)
298: {
299: PetscMPIInt iflg;
301: PetscFunctionBegin;
302: PetscCheck(!PetscDefined(HAVE_THREADSAFETY), comm, PETSC_ERR_SUP, "PetscSplitReductionGet() is not thread-safe");
303: if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
304: /*
305: The calling sequence of the 2nd argument to this function changed
306: between MPI Standard 1.0 and the revisions 1.1 Here we match the
307: new standard, if you are using an MPI implementation that uses
308: the older version you will get a warning message about the next line;
309: it is only a warning message and should do no harm.
310: */
311: PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelReduction, &Petsc_Reduction_keyval, NULL));
312: }
313: PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Reduction_keyval, (void **)sr, &iflg));
314: if (!iflg) { /* doesn't exist yet so create it and put it in */
315: PetscCall(PetscSplitReductionCreate(comm, sr));
316: PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Reduction_keyval, *sr));
317: PetscCall(PetscInfo(0, "Putting reduction data in an MPI_Comm %ld\n", (long)comm));
318: }
319: PetscFunctionReturn(PETSC_SUCCESS);
320: }
322: /*@
323: VecDotBegin - Starts a split phase dot product computation.
325: Input Parameters:
326: + x - the first vector
327: . y - the second vector
328: - result - where the result will go (can be `NULL`)
330: Level: advanced
332: Notes:
333: Each call to `VecDotBegin()` should be paired with a call to `VecDotEnd()`.
335: .seealso: `VecDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
336: `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`
337: @*/
338: PetscErrorCode VecDotBegin(Vec x, Vec y, PetscScalar *result)
339: {
340: PetscSplitReduction *sr;
341: MPI_Comm comm;
343: PetscFunctionBegin;
346: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
347: if (PetscDefined(HAVE_THREADSAFETY)) {
348: PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
349: PetscCall(VecDot(x, y, result));
350: PetscFunctionReturn(PETSC_SUCCESS);
351: }
352: PetscCall(PetscSplitReductionGet(comm, &sr));
353: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
354: if (sr->numopsbegin >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
355: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
356: sr->invecs[sr->numopsbegin] = (void *)x;
357: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
358: PetscUseTypeMethod(x, dot_local, y, sr->lvalues + sr->numopsbegin++);
359: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
360: PetscFunctionReturn(PETSC_SUCCESS);
361: }
363: /*@
364: VecDotEnd - Ends a split phase dot product computation.
366: Input Parameters:
367: + x - the first vector (can be `NULL`)
368: . y - the second vector (can be `NULL`)
369: - result - where the result will go
371: Level: advanced
373: Notes:
374: Each call to `VecDotBegin()` should be paired with a call to `VecDotEnd()`.
376: .seealso: `VecDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
377: `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`
378: @*/
379: PetscErrorCode VecDotEnd(Vec x, Vec y, PetscScalar *result)
380: {
381: PetscSplitReduction *sr;
382: MPI_Comm comm;
384: PetscFunctionBegin;
385: if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
386: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
387: PetscCall(PetscSplitReductionGet(comm, &sr));
388: PetscCall(PetscSplitReductionEnd(sr));
390: PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
391: PetscCheck(!x || (void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
392: PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_SUM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecDotEnd() on a reduction started with VecNormBegin()");
393: *result = sr->gvalues[sr->numopsend++];
395: /*
396: We are finished getting all the results so reset to no outstanding requests
397: */
398: if (sr->numopsend == sr->numopsbegin) {
399: sr->state = STATE_BEGIN;
400: sr->numopsend = 0;
401: sr->numopsbegin = 0;
402: sr->mix = PETSC_FALSE;
403: }
404: PetscFunctionReturn(PETSC_SUCCESS);
405: }
407: /*@
408: VecTDotBegin - Starts a split phase transpose dot product computation.
410: Input Parameters:
411: + x - the first vector
412: . y - the second vector
413: - result - where the result will go (can be `NULL`)
415: Level: advanced
417: Notes:
418: Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.
420: .seealso: `VecTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
421: `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
422: @*/
423: PetscErrorCode VecTDotBegin(Vec x, Vec y, PetscScalar *result)
424: {
425: PetscSplitReduction *sr;
426: MPI_Comm comm;
428: PetscFunctionBegin;
429: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
430: if (PetscDefined(HAVE_THREADSAFETY)) {
431: PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
432: PetscCall(VecTDot(x, y, result));
433: PetscFunctionReturn(PETSC_SUCCESS);
434: }
435: PetscCall(PetscSplitReductionGet(comm, &sr));
436: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
437: if (sr->numopsbegin >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
438: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
439: sr->invecs[sr->numopsbegin] = (void *)x;
440: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
441: PetscUseTypeMethod(x, tdot_local, y, sr->lvalues + sr->numopsbegin++);
442: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
443: PetscFunctionReturn(PETSC_SUCCESS);
444: }
446: /*@
447: VecTDotEnd - Ends a split phase transpose dot product computation.
449: Input Parameters:
450: + x - the first vector (can be `NULL`)
451: . y - the second vector (can be `NULL`)
452: - result - where the result will go
454: Level: advanced
456: Notes:
457: Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.
459: .seealso: `VecTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
460: `VecDotBegin()`, `VecDotEnd()`
461: @*/
462: PetscErrorCode VecTDotEnd(Vec x, Vec y, PetscScalar *result)
463: {
464: PetscFunctionBegin;
465: /*
466: TDotEnd() is the same as DotEnd() so reuse the code
467: */
468: PetscCall(VecDotEnd(x, y, result));
469: PetscFunctionReturn(PETSC_SUCCESS);
470: }
472: /*@
473: VecNormBegin - Starts a split phase norm computation.
475: Input Parameters:
476: + x - the first vector
477: . ntype - norm type, one of `NORM_1`, `NORM_2`, `NORM_MAX`, `NORM_1_AND_2`
478: - result - where the result will go (can be `NULL`)
480: Level: advanced
482: Notes:
483: Each call to `VecNormBegin()` should be paired with a call to `VecNormEnd()`.
485: .seealso: `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
486: @*/
487: PetscErrorCode VecNormBegin(Vec x, NormType ntype, PetscReal *result)
488: {
489: PetscSplitReduction *sr;
490: PetscReal lresult[2];
491: MPI_Comm comm;
493: PetscFunctionBegin;
495: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
496: if (PetscDefined(HAVE_THREADSAFETY)) {
497: PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
498: PetscCall(PetscObjectStateIncrease((PetscObject)x)); // increase PetscObjectState to invalidate cached norms
499: PetscCall(VecNorm(x, ntype, result));
500: PetscFunctionReturn(PETSC_SUCCESS);
501: }
502: PetscCall(PetscSplitReductionGet(comm, &sr));
503: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
504: if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops - 1 && ntype == NORM_1_AND_2)) PetscCall(PetscSplitReductionExtend(sr));
506: sr->invecs[sr->numopsbegin] = (void *)x;
507: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
508: PetscUseTypeMethod(x, norm_local, ntype, lresult);
509: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
510: if (ntype == NORM_2) lresult[0] = lresult[0] * lresult[0];
511: if (ntype == NORM_1_AND_2) lresult[1] = lresult[1] * lresult[1];
512: if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_MAX;
513: else sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
514: sr->lvalues[sr->numopsbegin++] = lresult[0];
515: if (ntype == NORM_1_AND_2) {
516: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
517: sr->lvalues[sr->numopsbegin++] = lresult[1];
518: }
519: PetscFunctionReturn(PETSC_SUCCESS);
520: }
522: /*@
523: VecNormEnd - Ends a split phase norm computation.
525: Input Parameters:
526: + x - the first vector
527: . ntype - norm type, one of `NORM_1`, `NORM_2`, `NORM_MAX`, `NORM_1_AND_2`
528: - result - where the result will go
530: Level: advanced
532: Notes:
533: Each call to `VecNormBegin()` should be paired with a call to `VecNormEnd()`.
535: The `x` vector is not allowed to be `NULL`, otherwise the vector would not have its correctly cached norm value
537: .seealso: `VecNormBegin()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
538: @*/
539: PetscErrorCode VecNormEnd(Vec x, NormType ntype, PetscReal *result)
540: {
541: PetscSplitReduction *sr;
542: MPI_Comm comm;
544: PetscFunctionBegin;
545: if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
547: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
548: PetscCall(PetscSplitReductionGet(comm, &sr));
549: PetscCall(PetscSplitReductionEnd(sr));
551: PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
552: PetscCheck((void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
553: PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_MAX || ntype != NORM_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
554: result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
556: if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
557: else if (ntype == NORM_1_AND_2) {
558: result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
559: result[1] = PetscSqrtReal(result[1]);
560: }
561: if (ntype != NORM_1_AND_2) PetscCall(PetscObjectComposedDataSetReal((PetscObject)x, NormIds[ntype], result[0]));
563: if (sr->numopsend == sr->numopsbegin) {
564: sr->state = STATE_BEGIN;
565: sr->numopsend = 0;
566: sr->numopsbegin = 0;
567: }
568: PetscFunctionReturn(PETSC_SUCCESS);
569: }
571: /*
572: Possibly add
574: PetscReductionSumBegin/End()
575: PetscReductionMaxBegin/End()
576: PetscReductionMinBegin/End()
577: or have more like MPI with a single function with flag for Op? Like first better
578: */
580: /*@
581: VecMDotBegin - Starts a split phase multiple dot product computation.
583: Input Parameters:
584: + x - the first vector
585: . nv - number of vectors
586: . y - array of vectors
587: - result - where the result will go (can be `NULL`)
589: Level: advanced
591: Notes:
592: Each call to `VecMDotBegin()` should be paired with a call to `VecMDotEnd()`.
594: .seealso: `VecMDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
595: `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
596: @*/
597: PetscErrorCode VecMDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
598: {
599: PetscSplitReduction *sr;
600: MPI_Comm comm;
601: PetscInt i;
603: PetscFunctionBegin;
604: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
605: if (PetscDefined(HAVE_THREADSAFETY)) {
606: PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
607: PetscCall(VecMDot(x, nv, y, result));
608: PetscFunctionReturn(PETSC_SUCCESS);
609: }
610: PetscCall(PetscSplitReductionGet(comm, &sr));
611: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
612: for (i = 0; i < nv; i++) {
613: if (sr->numopsbegin + i >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
614: sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
615: sr->invecs[sr->numopsbegin + i] = (void *)x;
616: }
617: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
618: PetscUseTypeMethod(x, mdot_local, nv, y, sr->lvalues + sr->numopsbegin);
619: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
620: sr->numopsbegin += nv;
621: PetscFunctionReturn(PETSC_SUCCESS);
622: }
624: /*@
625: VecMDotEnd - Ends a split phase multiple dot product computation.
627: Input Parameters:
628: + x - the first vector (can be `NULL`)
629: . nv - number of vectors
630: - y - array of vectors (can be `NULL`)
632: Output Parameter:
633: . result - where the result will go
635: Level: advanced
637: Notes:
638: Each call to `VecMDotBegin()` should be paired with a call to `VecMDotEnd()`.
640: .seealso: `VecMDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
641: `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
642: @*/
643: PetscErrorCode VecMDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
644: {
645: PetscSplitReduction *sr;
646: MPI_Comm comm;
647: PetscInt i;
649: PetscFunctionBegin;
650: if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
651: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
652: PetscCall(PetscSplitReductionGet(comm, &sr));
653: PetscCall(PetscSplitReductionEnd(sr));
655: PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
656: PetscCheck(!x || (void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
657: PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_SUM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecDotEnd() on a reduction started with VecNormBegin()");
658: for (i = 0; i < nv; i++) result[i] = sr->gvalues[sr->numopsend++];
660: /*
661: We are finished getting all the results so reset to no outstanding requests
662: */
663: if (sr->numopsend == sr->numopsbegin) {
664: sr->state = STATE_BEGIN;
665: sr->numopsend = 0;
666: sr->numopsbegin = 0;
667: }
668: PetscFunctionReturn(PETSC_SUCCESS);
669: }
671: /*@
672: VecMTDotBegin - Starts a split phase transpose multiple dot product computation.
674: Input Parameters:
675: + x - the first vector
676: . nv - number of vectors
677: . y - array of vectors
678: - result - where the result will go (can be `NULL`)
680: Level: advanced
682: Notes:
683: Each call to `VecMTDotBegin()` should be paired with a call to `VecMTDotEnd()`.
685: .seealso: `VecMTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
686: `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
687: @*/
688: PetscErrorCode VecMTDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
689: {
690: PetscSplitReduction *sr;
691: MPI_Comm comm;
692: PetscInt i;
694: PetscFunctionBegin;
695: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
696: if (PetscDefined(HAVE_THREADSAFETY)) {
697: PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
698: PetscCall(VecMTDot(x, nv, y, result));
699: PetscFunctionReturn(PETSC_SUCCESS);
700: }
701: PetscCall(PetscSplitReductionGet(comm, &sr));
702: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
703: for (i = 0; i < nv; i++) {
704: if (sr->numopsbegin + i >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
705: sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
706: sr->invecs[sr->numopsbegin + i] = (void *)x;
707: }
708: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
709: PetscUseTypeMethod(x, mtdot_local, nv, y, sr->lvalues + sr->numopsbegin);
710: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
711: sr->numopsbegin += nv;
712: PetscFunctionReturn(PETSC_SUCCESS);
713: }
715: /*@
716: VecMTDotEnd - Ends a split phase transpose multiple dot product computation.
718: Input Parameters:
719: + x - the first vector (can be `NULL`)
720: . nv - number of vectors
721: - y - array of vectors (can be `NULL`)
723: Output Parameter:
724: . result - where the result will go
726: Level: advanced
728: Notes:
729: Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.
731: .seealso: `VecMTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
732: `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
733: @*/
734: PetscErrorCode VecMTDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
735: {
736: PetscFunctionBegin;
737: /*
738: MTDotEnd() is the same as MDotEnd() so reuse the code
739: */
740: PetscCall(VecMDotEnd(x, nv, y, result));
741: PetscFunctionReturn(PETSC_SUCCESS);
742: }