Actual source code: comb.c
1: /*
2: Split phase global vector reductions with support for combining the
3: communication portion of several operations. Using MPI-1.1 support only
5: The idea for this and much of the initial code is contributed by
6: Victor Eijkhout.
8: Usage:
9: VecDotBegin(Vec,Vec,PetscScalar *);
10: VecNormBegin(Vec,NormType,PetscReal *);
11: ....
12: VecDotEnd(Vec,Vec,PetscScalar *);
13: VecNormEnd(Vec,NormType,PetscReal *);
15: Limitations:
16: - The order of the xxxEnd() functions MUST be in the same order
17: as the xxxBegin(). There is extensive error checking to try to
18: insure that the user calls the routines in the correct order
19: */
21: #include <petsc/private/vecimpl.h>
23: static PetscMPIInt MPIU_Iallreduce(void *sendbuf, void *recvbuf, PetscMPIInt count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request)
24: {
25: PetscMPIInt err;
27: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
28: err = MPI_Iallreduce(sendbuf, recvbuf, count, datatype, op, comm, request);
29: #else
30: err = MPIU_Allreduce(sendbuf, recvbuf, count, datatype, op, comm);
31: *request = MPI_REQUEST_NULL;
32: #endif
33: return err;
34: }
36: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *);
38: /*
39: PetscSplitReductionCreate - Creates a data structure to contain the queued information.
40: */
41: static PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm, PetscSplitReduction **sr)
42: {
43: PetscFunctionBegin;
44: PetscCall(PetscNew(sr));
45: (*sr)->numopsbegin = 0;
46: (*sr)->numopsend = 0;
47: (*sr)->state = STATE_BEGIN;
48: #define MAXOPS 32
49: (*sr)->maxops = MAXOPS;
50: PetscCall(PetscMalloc6(MAXOPS, &(*sr)->lvalues, MAXOPS, &(*sr)->gvalues, MAXOPS, &(*sr)->invecs, MAXOPS, &(*sr)->reducetype, MAXOPS, &(*sr)->lvalues_mix, MAXOPS, &(*sr)->gvalues_mix));
51: #undef MAXOPS
52: (*sr)->comm = comm;
53: (*sr)->request = MPI_REQUEST_NULL;
54: (*sr)->mix = PETSC_FALSE;
55: (*sr)->async = PETSC_FALSE;
56: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
57: (*sr)->async = PETSC_TRUE; /* Enable by default */
58: #endif
59: /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
60: PetscCall(PetscOptionsGetBool(NULL, NULL, "-splitreduction_async", &(*sr)->async, NULL));
61: PetscFunctionReturn(PETSC_SUCCESS);
62: }
64: /*
65: This function is the MPI reduction operation used when there is
66: a combination of sums and max in the reduction. The call below to
67: MPI_Op_create() converts the function PetscSplitReduction_Local() to the
68: MPI operator PetscSplitReduction_Op.
69: */
70: MPI_Op PetscSplitReduction_Op = 0;
72: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in, void *out, PetscMPIInt *cnt, MPI_Datatype *datatype)
73: {
74: struct PetscScalarInt {
75: PetscScalar v;
76: PetscInt i;
77: };
78: struct PetscScalarInt *xin = (struct PetscScalarInt *)in;
79: struct PetscScalarInt *xout = (struct PetscScalarInt *)out;
80: PetscInt i, count = *cnt;
82: PetscFunctionBegin;
83: if (*datatype != MPIU_SCALAR_INT) {
84: PetscCallAbort(MPI_COMM_SELF, (*PetscErrorPrintf)("Can only handle MPIU_SCALAR_INT data types"));
85: PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
86: }
87: for (i = 0; i < count; i++) {
88: if (xin[i].i == PETSC_SR_REDUCE_SUM) xout[i].v += xin[i].v;
89: else if (xin[i].i == PETSC_SR_REDUCE_MAX) xout[i].v = PetscMax(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
90: else if (xin[i].i == PETSC_SR_REDUCE_MIN) xout[i].v = PetscMin(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
91: else {
92: PetscCallAbort(MPI_COMM_SELF, (*PetscErrorPrintf)("Reduction type input is not PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN"));
93: PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
94: }
95: }
96: PetscFunctionReturnVoid();
97: }
99: /*@
100: PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction
102: Collective but not synchronizing
104: Input Parameter:
105: . comm - communicator on which split reduction has been queued
107: Level: advanced
109: Note:
110: Calling this function is optional when using split-mode reduction. On supporting hardware,
111: calling this after all VecXxxBegin() allows the reduction to make asynchronous progress
112: before the result is needed (in VecXxxEnd()).
114: .seealso: `VecNormBegin()`, `VecNormEnd()`, `VecDotBegin()`, `VecDotEnd()`, `VecTDotBegin()`, `VecTDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`
115: @*/
116: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
117: {
118: PetscSplitReduction *sr;
120: PetscFunctionBegin;
121: if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
122: PetscCall(PetscSplitReductionGet(comm, &sr));
123: PetscCheck(sr->numopsend <= 0, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot call this after VecxxxEnd() has been called");
124: if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
125: PetscMPIInt numops = sr->numopsbegin;
126: PetscSRReductionType *reducetype = sr->reducetype;
127: PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
128: PetscInt sum_flg = 0, max_flg = 0, min_flg = 0;
129: MPI_Comm comm = sr->comm;
130: PetscMPIInt size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);
132: PetscCall(PetscLogEventBegin(VEC_ReduceBegin, 0, 0, 0, 0));
133: PetscCallMPI(MPI_Comm_size(sr->comm, &size));
134: if (size == 1) {
135: PetscCall(PetscArraycpy(gvalues, lvalues, numops));
136: } else {
137: /* determine if all reductions are sum, max, or min */
138: for (PetscMPIInt i = 0; i < numops; i++) {
139: if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
140: else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
141: else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
142: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
143: }
144: PetscCheck(sum_flg + max_flg + min_flg <= 1 || !sr->mix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
145: if (sum_flg + max_flg + min_flg > 1) {
146: sr->mix = PETSC_TRUE;
147: for (PetscMPIInt i = 0; i < numops; i++) {
148: sr->lvalues_mix[i].v = lvalues[i];
149: sr->lvalues_mix[i].i = reducetype[i];
150: }
151: PetscCallMPI(MPIU_Iallreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm, &sr->request));
152: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
153: PetscCallMPI(MPIU_Iallreduce(lvalues, gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm, &sr->request));
154: } else if (min_flg) {
155: PetscCallMPI(MPIU_Iallreduce(lvalues, gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm, &sr->request));
156: } else {
157: PetscCallMPI(MPIU_Iallreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm, &sr->request));
158: }
159: }
160: sr->state = STATE_PENDING;
161: sr->numopsend = 0;
162: PetscCall(PetscLogEventEnd(VEC_ReduceBegin, 0, 0, 0, 0));
163: } else {
164: PetscCall(PetscSplitReductionApply(sr));
165: }
166: PetscFunctionReturn(PETSC_SUCCESS);
167: }
169: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
170: {
171: PetscFunctionBegin;
172: switch (sr->state) {
173: case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
174: PetscCall(PetscSplitReductionApply(sr));
175: break;
176: case STATE_PENDING:
177: /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
178: PetscCall(PetscLogEventBegin(VEC_ReduceEnd, 0, 0, 0, 0));
179: if (sr->request != MPI_REQUEST_NULL) PetscCallMPI(MPI_Wait(&sr->request, MPI_STATUS_IGNORE));
180: sr->state = STATE_END;
181: if (sr->mix) {
182: for (PetscMPIInt i = 0; i < sr->numopsbegin; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
183: sr->mix = PETSC_FALSE;
184: }
185: PetscCall(PetscLogEventEnd(VEC_ReduceEnd, 0, 0, 0, 0));
186: break;
187: default:
188: break; /* everything is already done */
189: }
190: PetscFunctionReturn(PETSC_SUCCESS);
191: }
193: /*
194: PetscSplitReductionApply - Actually do the communication required for a split phase reduction
195: */
196: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
197: {
198: PetscMPIInt numops = sr->numopsbegin;
199: PetscSRReductionType *reducetype = sr->reducetype;
200: PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
201: PetscInt sum_flg = 0, max_flg = 0, min_flg = 0;
202: MPI_Comm comm = sr->comm;
203: PetscMPIInt size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);
205: PetscFunctionBegin;
206: PetscCheck(sr->numopsend <= 0, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot call this after VecxxxEnd() has been called");
207: PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
208: PetscCallMPI(MPI_Comm_size(sr->comm, &size));
209: if (size == 1) {
210: PetscCall(PetscArraycpy(gvalues, lvalues, numops));
211: } else {
212: /* determine if all reductions are sum, max, or min */
213: for (PetscMPIInt i = 0; i < numops; i++) {
214: if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
215: else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
216: else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
217: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
218: }
219: if (sum_flg + max_flg + min_flg > 1) {
220: PetscCheck(!sr->mix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
221: for (PetscMPIInt i = 0; i < numops; i++) {
222: sr->lvalues_mix[i].v = lvalues[i];
223: sr->lvalues_mix[i].i = reducetype[i];
224: }
225: PetscCallMPI(MPIU_Allreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm));
226: for (PetscMPIInt i = 0; i < numops; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
227: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
228: PetscCallMPI(MPIU_Allreduce(lvalues, gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm));
229: } else if (min_flg) {
230: PetscCallMPI(MPIU_Allreduce(lvalues, gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm));
231: } else {
232: PetscCallMPI(MPIU_Allreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm));
233: }
234: }
235: sr->state = STATE_END;
236: sr->numopsend = 0;
237: PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
238: PetscFunctionReturn(PETSC_SUCCESS);
239: }
241: /*
242: PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
243: */
244: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
245: {
246: struct PetscScalarInt {
247: PetscScalar v;
248: PetscInt i;
249: };
250: PetscMPIInt maxops = sr->maxops;
251: PetscSRReductionType *reducetype = sr->reducetype;
252: PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
253: struct PetscScalarInt *lvalues_mix = (struct PetscScalarInt *)sr->lvalues_mix;
254: struct PetscScalarInt *gvalues_mix = (struct PetscScalarInt *)sr->gvalues_mix;
255: void **invecs = sr->invecs;
257: PetscFunctionBegin;
258: sr->maxops = 2 * maxops;
259: PetscCall(PetscMalloc6(2 * maxops, &sr->lvalues, 2 * maxops, &sr->gvalues, 2 * maxops, &sr->reducetype, 2 * maxops, &sr->invecs, 2 * maxops, &sr->lvalues_mix, 2 * maxops, &sr->gvalues_mix));
260: PetscCall(PetscArraycpy(sr->lvalues, lvalues, maxops));
261: PetscCall(PetscArraycpy(sr->gvalues, gvalues, maxops));
262: PetscCall(PetscArraycpy(sr->reducetype, reducetype, maxops));
263: PetscCall(PetscArraycpy(sr->invecs, invecs, maxops));
264: PetscCall(PetscArraycpy(sr->lvalues_mix, lvalues_mix, maxops));
265: PetscCall(PetscArraycpy(sr->gvalues_mix, gvalues_mix, maxops));
266: PetscCall(PetscFree6(lvalues, gvalues, reducetype, invecs, lvalues_mix, gvalues_mix));
267: PetscFunctionReturn(PETSC_SUCCESS);
268: }
270: static PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
271: {
272: PetscFunctionBegin;
273: PetscCall(PetscFree6(sr->lvalues, sr->gvalues, sr->reducetype, sr->invecs, sr->lvalues_mix, sr->gvalues_mix));
274: PetscCall(PetscFree(sr));
275: PetscFunctionReturn(PETSC_SUCCESS);
276: }
278: PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;
280: /*
281: Private routine to delete internal storage when a communicator is freed.
282: This is called by MPI, not by users.
284: The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
285: it was MPI_Comm *comm.
286: */
287: static PetscMPIInt MPIAPI Petsc_DelReduction(MPI_Comm comm, PETSC_UNUSED PetscMPIInt keyval, void *attr_val, PETSC_UNUSED void *extra_state)
288: {
289: PetscFunctionBegin;
290: PetscCallReturnMPI(PetscInfo(0, "Deleting reduction data in an MPI_Comm %ld\n", (long)comm));
291: PetscCallReturnMPI(PetscSplitReductionDestroy((PetscSplitReduction *)attr_val));
292: PetscFunctionReturn(PETSC_SUCCESS);
293: }
295: /*
296: PetscSplitReductionGet - Gets the split reduction object from a
297: PETSc vector, creates if it does not exit.
299: */
300: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm, PetscSplitReduction **sr)
301: {
302: PetscMPIInt flag;
304: PetscFunctionBegin;
305: PetscCheck(!PetscDefined(HAVE_THREADSAFETY), comm, PETSC_ERR_SUP, "PetscSplitReductionGet() is not thread-safe");
306: if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
307: /*
308: The calling sequence of the 2nd argument to this function changed
309: between MPI Standard 1.0 and the revisions 1.1 Here we match the
310: new standard, if you are using an MPI implementation that uses
311: the older version you will get a warning message about the next line;
312: it is only a warning message and should do no harm.
313: */
314: PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelReduction, &Petsc_Reduction_keyval, NULL));
315: }
316: PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Reduction_keyval, (void **)sr, &flag));
317: if (!flag) { /* doesn't exist yet so create it and put it in */
318: PetscCall(PetscSplitReductionCreate(comm, sr));
319: PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Reduction_keyval, *sr));
320: PetscCall(PetscInfo(0, "Putting reduction data in an MPI_Comm %ld\n", (long)comm));
321: }
322: PetscFunctionReturn(PETSC_SUCCESS);
323: }
325: /* ----------------------------------------------------------------------------------------------------*/
327: /*@
328: VecDotBegin - Starts a split phase dot product computation.
330: Input Parameters:
331: + x - the first vector
332: . y - the second vector
333: - result - where the result will go (can be `NULL`)
335: Level: advanced
337: Notes:
338: Each call to `VecDotBegin()` should be paired with a call to `VecDotEnd()`.
340: .seealso: `VecDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
341: `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`
342: @*/
343: PetscErrorCode VecDotBegin(Vec x, Vec y, PetscScalar *result)
344: {
345: PetscSplitReduction *sr;
346: MPI_Comm comm;
348: PetscFunctionBegin;
351: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
352: if (PetscDefined(HAVE_THREADSAFETY)) {
353: PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
354: PetscCall(VecDot(x, y, result));
355: PetscFunctionReturn(PETSC_SUCCESS);
356: }
357: PetscCall(PetscSplitReductionGet(comm, &sr));
358: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
359: if (sr->numopsbegin >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
360: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
361: sr->invecs[sr->numopsbegin] = (void *)x;
362: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
363: PetscUseTypeMethod(x, dot_local, y, sr->lvalues + sr->numopsbegin++);
364: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
365: PetscFunctionReturn(PETSC_SUCCESS);
366: }
368: /*@
369: VecDotEnd - Ends a split phase dot product computation.
371: Input Parameters:
372: + x - the first vector (can be `NULL`)
373: . y - the second vector (can be `NULL`)
374: - result - where the result will go
376: Level: advanced
378: Notes:
379: Each call to `VecDotBegin()` should be paired with a call to `VecDotEnd()`.
381: .seealso: `VecDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
382: `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`
383: @*/
384: PetscErrorCode VecDotEnd(Vec x, Vec y, PetscScalar *result)
385: {
386: PetscSplitReduction *sr;
387: MPI_Comm comm;
389: PetscFunctionBegin;
390: if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
391: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
392: PetscCall(PetscSplitReductionGet(comm, &sr));
393: PetscCall(PetscSplitReductionEnd(sr));
395: PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
396: PetscCheck(!x || (void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
397: PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_SUM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecDotEnd() on a reduction started with VecNormBegin()");
398: *result = sr->gvalues[sr->numopsend++];
400: /*
401: We are finished getting all the results so reset to no outstanding requests
402: */
403: if (sr->numopsend == sr->numopsbegin) {
404: sr->state = STATE_BEGIN;
405: sr->numopsend = 0;
406: sr->numopsbegin = 0;
407: sr->mix = PETSC_FALSE;
408: }
409: PetscFunctionReturn(PETSC_SUCCESS);
410: }
412: /*@
413: VecTDotBegin - Starts a split phase transpose dot product computation.
415: Input Parameters:
416: + x - the first vector
417: . y - the second vector
418: - result - where the result will go (can be `NULL`)
420: Level: advanced
422: Notes:
423: Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.
425: .seealso: `VecTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
426: `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
427: @*/
428: PetscErrorCode VecTDotBegin(Vec x, Vec y, PetscScalar *result)
429: {
430: PetscSplitReduction *sr;
431: MPI_Comm comm;
433: PetscFunctionBegin;
434: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
435: if (PetscDefined(HAVE_THREADSAFETY)) {
436: PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
437: PetscCall(VecTDot(x, y, result));
438: PetscFunctionReturn(PETSC_SUCCESS);
439: }
440: PetscCall(PetscSplitReductionGet(comm, &sr));
441: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
442: if (sr->numopsbegin >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
443: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
444: sr->invecs[sr->numopsbegin] = (void *)x;
445: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
446: PetscUseTypeMethod(x, tdot_local, y, sr->lvalues + sr->numopsbegin++);
447: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
448: PetscFunctionReturn(PETSC_SUCCESS);
449: }
451: /*@
452: VecTDotEnd - Ends a split phase transpose dot product computation.
454: Input Parameters:
455: + x - the first vector (can be `NULL`)
456: . y - the second vector (can be `NULL`)
457: - result - where the result will go
459: Level: advanced
461: Notes:
462: Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.
464: .seealso: `VecTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
465: `VecDotBegin()`, `VecDotEnd()`
466: @*/
467: PetscErrorCode VecTDotEnd(Vec x, Vec y, PetscScalar *result)
468: {
469: PetscFunctionBegin;
470: /*
471: TDotEnd() is the same as DotEnd() so reuse the code
472: */
473: PetscCall(VecDotEnd(x, y, result));
474: PetscFunctionReturn(PETSC_SUCCESS);
475: }
477: /* -------------------------------------------------------------------------*/
479: /*@
480: VecNormBegin - Starts a split phase norm computation.
482: Input Parameters:
483: + x - the first vector
484: . ntype - norm type, one of `NORM_1`, `NORM_2`, `NORM_MAX`, `NORM_1_AND_2`
485: - result - where the result will go (can be `NULL`)
487: Level: advanced
489: Notes:
490: Each call to `VecNormBegin()` should be paired with a call to `VecNormEnd()`.
492: .seealso: `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
493: @*/
494: PetscErrorCode VecNormBegin(Vec x, NormType ntype, PetscReal *result)
495: {
496: PetscSplitReduction *sr;
497: PetscReal lresult[2];
498: MPI_Comm comm;
500: PetscFunctionBegin;
502: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
503: if (PetscDefined(HAVE_THREADSAFETY)) {
504: PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
505: PetscCall(PetscObjectStateIncrease((PetscObject)x)); // increase PetscObjectState to invalidate cached norms
506: PetscCall(VecNorm(x, ntype, result));
507: PetscFunctionReturn(PETSC_SUCCESS);
508: }
509: PetscCall(PetscSplitReductionGet(comm, &sr));
510: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
511: if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops - 1 && ntype == NORM_1_AND_2)) PetscCall(PetscSplitReductionExtend(sr));
513: sr->invecs[sr->numopsbegin] = (void *)x;
514: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
515: PetscUseTypeMethod(x, norm_local, ntype, lresult);
516: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
517: if (ntype == NORM_2) lresult[0] = lresult[0] * lresult[0];
518: if (ntype == NORM_1_AND_2) lresult[1] = lresult[1] * lresult[1];
519: if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_MAX;
520: else sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
521: sr->lvalues[sr->numopsbegin++] = lresult[0];
522: if (ntype == NORM_1_AND_2) {
523: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
524: sr->lvalues[sr->numopsbegin++] = lresult[1];
525: }
526: PetscFunctionReturn(PETSC_SUCCESS);
527: }
529: /*@
530: VecNormEnd - Ends a split phase norm computation.
532: Input Parameters:
533: + x - the first vector
534: . ntype - norm type, one of `NORM_1`, `NORM_2`, `NORM_MAX`, `NORM_1_AND_2`
535: - result - where the result will go
537: Level: advanced
539: Notes:
540: Each call to `VecNormBegin()` should be paired with a call to `VecNormEnd()`.
542: The `x` vector is not allowed to be `NULL`, otherwise the vector would not have its correctly cached norm value
544: .seealso: `VecNormBegin()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
545: @*/
546: PetscErrorCode VecNormEnd(Vec x, NormType ntype, PetscReal *result)
547: {
548: PetscSplitReduction *sr;
549: MPI_Comm comm;
551: PetscFunctionBegin;
552: if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
554: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
555: PetscCall(PetscSplitReductionGet(comm, &sr));
556: PetscCall(PetscSplitReductionEnd(sr));
558: PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
559: PetscCheck((void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
560: PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_MAX || ntype != NORM_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
561: result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
563: if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
564: else if (ntype == NORM_1_AND_2) {
565: result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
566: result[1] = PetscSqrtReal(result[1]);
567: }
568: if (ntype != NORM_1_AND_2) PetscCall(PetscObjectComposedDataSetReal((PetscObject)x, NormIds[ntype], result[0]));
570: if (sr->numopsend == sr->numopsbegin) {
571: sr->state = STATE_BEGIN;
572: sr->numopsend = 0;
573: sr->numopsbegin = 0;
574: }
575: PetscFunctionReturn(PETSC_SUCCESS);
576: }
578: /*
579: Possibly add
581: PetscReductionSumBegin/End()
582: PetscReductionMaxBegin/End()
583: PetscReductionMinBegin/End()
584: or have more like MPI with a single function with flag for Op? Like first better
585: */
587: /*@
588: VecMDotBegin - Starts a split phase multiple dot product computation.
590: Input Parameters:
591: + x - the first vector
592: . nv - number of vectors
593: . y - array of vectors
594: - result - where the result will go (can be `NULL`)
596: Level: advanced
598: Notes:
599: Each call to `VecMDotBegin()` should be paired with a call to `VecMDotEnd()`.
601: .seealso: `VecMDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
602: `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
603: @*/
604: PetscErrorCode VecMDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
605: {
606: PetscSplitReduction *sr;
607: MPI_Comm comm;
608: PetscInt i;
610: PetscFunctionBegin;
611: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
612: if (PetscDefined(HAVE_THREADSAFETY)) {
613: PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
614: PetscCall(VecMDot(x, nv, y, result));
615: PetscFunctionReturn(PETSC_SUCCESS);
616: }
617: PetscCall(PetscSplitReductionGet(comm, &sr));
618: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
619: for (i = 0; i < nv; i++) {
620: if (sr->numopsbegin + i >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
621: sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
622: sr->invecs[sr->numopsbegin + i] = (void *)x;
623: }
624: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
625: PetscUseTypeMethod(x, mdot_local, nv, y, sr->lvalues + sr->numopsbegin);
626: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
627: sr->numopsbegin += nv;
628: PetscFunctionReturn(PETSC_SUCCESS);
629: }
631: /*@
632: VecMDotEnd - Ends a split phase multiple dot product computation.
634: Input Parameters:
635: + x - the first vector (can be `NULL`)
636: . nv - number of vectors
637: - y - array of vectors (can be `NULL`)
639: Output Parameter:
640: . result - where the result will go
642: Level: advanced
644: Notes:
645: Each call to `VecMDotBegin()` should be paired with a call to `VecMDotEnd()`.
647: .seealso: `VecMDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
648: `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
649: @*/
650: PetscErrorCode VecMDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
651: {
652: PetscSplitReduction *sr;
653: MPI_Comm comm;
654: PetscInt i;
656: PetscFunctionBegin;
657: if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
658: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
659: PetscCall(PetscSplitReductionGet(comm, &sr));
660: PetscCall(PetscSplitReductionEnd(sr));
662: PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
663: PetscCheck(!x || (void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
664: PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_SUM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecDotEnd() on a reduction started with VecNormBegin()");
665: for (i = 0; i < nv; i++) result[i] = sr->gvalues[sr->numopsend++];
667: /*
668: We are finished getting all the results so reset to no outstanding requests
669: */
670: if (sr->numopsend == sr->numopsbegin) {
671: sr->state = STATE_BEGIN;
672: sr->numopsend = 0;
673: sr->numopsbegin = 0;
674: }
675: PetscFunctionReturn(PETSC_SUCCESS);
676: }
678: /*@
679: VecMTDotBegin - Starts a split phase transpose multiple dot product computation.
681: Input Parameters:
682: + x - the first vector
683: . nv - number of vectors
684: . y - array of vectors
685: - result - where the result will go (can be `NULL`)
687: Level: advanced
689: Notes:
690: Each call to `VecMTDotBegin()` should be paired with a call to `VecMTDotEnd()`.
692: .seealso: `VecMTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
693: `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
694: @*/
695: PetscErrorCode VecMTDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
696: {
697: PetscSplitReduction *sr;
698: MPI_Comm comm;
699: PetscInt i;
701: PetscFunctionBegin;
702: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
703: if (PetscDefined(HAVE_THREADSAFETY)) {
704: PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
705: PetscCall(VecMTDot(x, nv, y, result));
706: PetscFunctionReturn(PETSC_SUCCESS);
707: }
708: PetscCall(PetscSplitReductionGet(comm, &sr));
709: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
710: for (i = 0; i < nv; i++) {
711: if (sr->numopsbegin + i >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
712: sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
713: sr->invecs[sr->numopsbegin + i] = (void *)x;
714: }
715: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
716: PetscUseTypeMethod(x, mtdot_local, nv, y, sr->lvalues + sr->numopsbegin);
717: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
718: sr->numopsbegin += nv;
719: PetscFunctionReturn(PETSC_SUCCESS);
720: }
722: /*@
723: VecMTDotEnd - Ends a split phase transpose multiple dot product computation.
725: Input Parameters:
726: + x - the first vector (can be `NULL`)
727: . nv - number of vectors
728: - y - array of vectors (can be `NULL`)
730: Output Parameter:
731: . result - where the result will go
733: Level: advanced
735: Notes:
736: Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.
738: .seealso: `VecMTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
739: `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
740: @*/
741: PetscErrorCode VecMTDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
742: {
743: PetscFunctionBegin;
744: /*
745: MTDotEnd() is the same as MDotEnd() so reuse the code
746: */
747: PetscCall(VecMDotEnd(x, nv, y, result));
748: PetscFunctionReturn(PETSC_SUCCESS);
749: }