Actual source code: comb.c

  1: /*
  2:       Split phase global vector reductions with support for combining the
  3:    communication portion of several operations. Using MPI-1.1 support only

  5:       The idea for this and much of the initial code is contributed by
  6:    Victor Eijkhout.

  8:        Usage:
  9:              VecDotBegin(Vec,Vec,PetscScalar *);
 10:              VecNormBegin(Vec,NormType,PetscReal *);
 11:              ....
 12:              VecDotEnd(Vec,Vec,PetscScalar *);
 13:              VecNormEnd(Vec,NormType,PetscReal *);

 15:        Limitations:
 16:          - The order of the xxxEnd() functions MUST be in the same order
 17:            as the xxxBegin(). There is extensive error checking to try to
 18:            insure that the user calls the routines in the correct order
 19: */

 21: #include <petsc/private/vecimpl.h>

 23: static PetscMPIInt MPIU_Iallreduce(void *sendbuf, void *recvbuf, PetscMPIInt count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request)
 24: {
 25:   PetscMPIInt err;

 27: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
 28:   err = MPI_Iallreduce(sendbuf, recvbuf, count, datatype, op, comm, request);
 29: #else
 30:   err      = MPIU_Allreduce(sendbuf, recvbuf, count, datatype, op, comm);
 31:   *request = MPI_REQUEST_NULL;
 32: #endif
 33:   return err;
 34: }

 36: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *);

 38: /*
 39:    PetscSplitReductionCreate - Creates a data structure to contain the queued information.
 40: */
 41: static PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm, PetscSplitReduction **sr)
 42: {
 43:   PetscFunctionBegin;
 44:   PetscCall(PetscNew(sr));
 45:   (*sr)->numopsbegin = 0;
 46:   (*sr)->numopsend   = 0;
 47:   (*sr)->state       = STATE_BEGIN;
 48: #define MAXOPS 32
 49:   (*sr)->maxops = MAXOPS;
 50:   PetscCall(PetscMalloc6(MAXOPS, &(*sr)->lvalues, MAXOPS, &(*sr)->gvalues, MAXOPS, &(*sr)->invecs, MAXOPS, &(*sr)->reducetype, MAXOPS, &(*sr)->lvalues_mix, MAXOPS, &(*sr)->gvalues_mix));
 51: #undef MAXOPS
 52:   (*sr)->comm    = comm;
 53:   (*sr)->request = MPI_REQUEST_NULL;
 54:   (*sr)->mix     = PETSC_FALSE;
 55:   (*sr)->async   = PETSC_FALSE;
 56: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
 57:   (*sr)->async = PETSC_TRUE; /* Enable by default */
 58: #endif
 59:   /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
 60:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-splitreduction_async", &(*sr)->async, NULL));
 61:   PetscFunctionReturn(PETSC_SUCCESS);
 62: }

 64: /*
 65:        This function is the MPI reduction operation used when there is
 66:    a combination of sums and max in the reduction. The call below to
 67:    MPI_Op_create() converts the function PetscSplitReduction_Local() to the
 68:    MPI operator PetscSplitReduction_Op.
 69: */
 70: MPI_Op PetscSplitReduction_Op = 0;

 72: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in, void *out, PetscMPIInt *cnt, MPI_Datatype *datatype)
 73: {
 74:   struct PetscScalarInt {
 75:     PetscScalar v;
 76:     PetscInt    i;
 77:   };
 78:   struct PetscScalarInt *xin  = (struct PetscScalarInt *)in;
 79:   struct PetscScalarInt *xout = (struct PetscScalarInt *)out;
 80:   PetscInt               i, count = *cnt;

 82:   PetscFunctionBegin;
 83:   if (*datatype != MPIU_SCALAR_INT) {
 84:     PetscCallAbort(MPI_COMM_SELF, (*PetscErrorPrintf)("Can only handle MPIU_SCALAR_INT data types"));
 85:     PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
 86:   }
 87:   for (i = 0; i < count; i++) {
 88:     if (xin[i].i == PETSC_SR_REDUCE_SUM) xout[i].v += xin[i].v;
 89:     else if (xin[i].i == PETSC_SR_REDUCE_MAX) xout[i].v = PetscMax(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
 90:     else if (xin[i].i == PETSC_SR_REDUCE_MIN) xout[i].v = PetscMin(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
 91:     else {
 92:       PetscCallAbort(MPI_COMM_SELF, (*PetscErrorPrintf)("Reduction type input is not PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN"));
 93:       PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
 94:     }
 95:   }
 96:   PetscFunctionReturnVoid();
 97: }

 99: /*@
100:   PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction

102:   Collective but not synchronizing

104:   Input Parameter:
105: . comm - communicator on which split reduction has been queued

107:   Level: advanced

109:   Note:
110:   Calling this function is optional when using split-mode reduction. On supporting hardware,
111:   calling this after all VecXxxBegin() allows the reduction to make asynchronous progress
112:   before the result is needed (in VecXxxEnd()).

114: .seealso: `VecNormBegin()`, `VecNormEnd()`, `VecDotBegin()`, `VecDotEnd()`, `VecTDotBegin()`, `VecTDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`
115: @*/
116: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
117: {
118:   PetscSplitReduction *sr;

120:   PetscFunctionBegin;
121:   if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
122:   PetscCall(PetscSplitReductionGet(comm, &sr));
123:   PetscCheck(sr->numopsend <= 0, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot call this after VecxxxEnd() has been called");
124:   if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
125:     PetscMPIInt           numops     = sr->numopsbegin;
126:     PetscSRReductionType *reducetype = sr->reducetype;
127:     PetscScalar          *lvalues = sr->lvalues, *gvalues = sr->gvalues;
128:     PetscInt              sum_flg = 0, max_flg = 0, min_flg = 0;
129:     MPI_Comm              comm = sr->comm;
130:     PetscMPIInt           size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);

132:     PetscCall(PetscLogEventBegin(VEC_ReduceBegin, 0, 0, 0, 0));
133:     PetscCallMPI(MPI_Comm_size(sr->comm, &size));
134:     if (size == 1) {
135:       PetscCall(PetscArraycpy(gvalues, lvalues, numops));
136:     } else {
137:       /* determine if all reductions are sum, max, or min */
138:       for (PetscMPIInt i = 0; i < numops; i++) {
139:         if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
140:         else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
141:         else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
142:         else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
143:       }
144:       PetscCheck(sum_flg + max_flg + min_flg <= 1 || !sr->mix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
145:       if (sum_flg + max_flg + min_flg > 1) {
146:         sr->mix = PETSC_TRUE;
147:         for (PetscMPIInt i = 0; i < numops; i++) {
148:           sr->lvalues_mix[i].v = lvalues[i];
149:           sr->lvalues_mix[i].i = reducetype[i];
150:         }
151:         PetscCallMPI(MPIU_Iallreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm, &sr->request));
152:       } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
153:         PetscCallMPI(MPIU_Iallreduce(lvalues, gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm, &sr->request));
154:       } else if (min_flg) {
155:         PetscCallMPI(MPIU_Iallreduce(lvalues, gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm, &sr->request));
156:       } else {
157:         PetscCallMPI(MPIU_Iallreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm, &sr->request));
158:       }
159:     }
160:     sr->state     = STATE_PENDING;
161:     sr->numopsend = 0;
162:     PetscCall(PetscLogEventEnd(VEC_ReduceBegin, 0, 0, 0, 0));
163:   } else {
164:     PetscCall(PetscSplitReductionApply(sr));
165:   }
166:   PetscFunctionReturn(PETSC_SUCCESS);
167: }

169: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
170: {
171:   PetscFunctionBegin;
172:   switch (sr->state) {
173:   case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
174:     PetscCall(PetscSplitReductionApply(sr));
175:     break;
176:   case STATE_PENDING:
177:     /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
178:     PetscCall(PetscLogEventBegin(VEC_ReduceEnd, 0, 0, 0, 0));
179:     if (sr->request != MPI_REQUEST_NULL) PetscCallMPI(MPI_Wait(&sr->request, MPI_STATUS_IGNORE));
180:     sr->state = STATE_END;
181:     if (sr->mix) {
182:       for (PetscMPIInt i = 0; i < sr->numopsbegin; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
183:       sr->mix = PETSC_FALSE;
184:     }
185:     PetscCall(PetscLogEventEnd(VEC_ReduceEnd, 0, 0, 0, 0));
186:     break;
187:   default:
188:     break; /* everything is already done */
189:   }
190:   PetscFunctionReturn(PETSC_SUCCESS);
191: }

193: /*
194:    PetscSplitReductionApply - Actually do the communication required for a split phase reduction
195: */
196: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
197: {
198:   PetscMPIInt           numops     = sr->numopsbegin;
199:   PetscSRReductionType *reducetype = sr->reducetype;
200:   PetscScalar          *lvalues = sr->lvalues, *gvalues = sr->gvalues;
201:   PetscInt              sum_flg = 0, max_flg = 0, min_flg = 0;
202:   MPI_Comm              comm = sr->comm;
203:   PetscMPIInt           size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);

205:   PetscFunctionBegin;
206:   PetscCheck(sr->numopsend <= 0, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot call this after VecxxxEnd() has been called");
207:   PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
208:   PetscCallMPI(MPI_Comm_size(sr->comm, &size));
209:   if (size == 1) {
210:     PetscCall(PetscArraycpy(gvalues, lvalues, numops));
211:   } else {
212:     /* determine if all reductions are sum, max, or min */
213:     for (PetscMPIInt i = 0; i < numops; i++) {
214:       if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
215:       else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
216:       else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
217:       else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
218:     }
219:     if (sum_flg + max_flg + min_flg > 1) {
220:       PetscCheck(!sr->mix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
221:       for (PetscMPIInt i = 0; i < numops; i++) {
222:         sr->lvalues_mix[i].v = lvalues[i];
223:         sr->lvalues_mix[i].i = reducetype[i];
224:       }
225:       PetscCallMPI(MPIU_Allreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm));
226:       for (PetscMPIInt i = 0; i < numops; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
227:     } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
228:       PetscCallMPI(MPIU_Allreduce(lvalues, gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm));
229:     } else if (min_flg) {
230:       PetscCallMPI(MPIU_Allreduce(lvalues, gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm));
231:     } else {
232:       PetscCallMPI(MPIU_Allreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm));
233:     }
234:   }
235:   sr->state     = STATE_END;
236:   sr->numopsend = 0;
237:   PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
238:   PetscFunctionReturn(PETSC_SUCCESS);
239: }

241: /*
242:    PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
243: */
244: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
245: {
246:   struct PetscScalarInt {
247:     PetscScalar v;
248:     PetscInt    i;
249:   };
250:   PetscMPIInt            maxops     = sr->maxops;
251:   PetscSRReductionType  *reducetype = sr->reducetype;
252:   PetscScalar           *lvalues = sr->lvalues, *gvalues = sr->gvalues;
253:   struct PetscScalarInt *lvalues_mix = (struct PetscScalarInt *)sr->lvalues_mix;
254:   struct PetscScalarInt *gvalues_mix = (struct PetscScalarInt *)sr->gvalues_mix;
255:   void                 **invecs      = sr->invecs;

257:   PetscFunctionBegin;
258:   sr->maxops = 2 * maxops;
259:   PetscCall(PetscMalloc6(2 * maxops, &sr->lvalues, 2 * maxops, &sr->gvalues, 2 * maxops, &sr->reducetype, 2 * maxops, &sr->invecs, 2 * maxops, &sr->lvalues_mix, 2 * maxops, &sr->gvalues_mix));
260:   PetscCall(PetscArraycpy(sr->lvalues, lvalues, maxops));
261:   PetscCall(PetscArraycpy(sr->gvalues, gvalues, maxops));
262:   PetscCall(PetscArraycpy(sr->reducetype, reducetype, maxops));
263:   PetscCall(PetscArraycpy(sr->invecs, invecs, maxops));
264:   PetscCall(PetscArraycpy(sr->lvalues_mix, lvalues_mix, maxops));
265:   PetscCall(PetscArraycpy(sr->gvalues_mix, gvalues_mix, maxops));
266:   PetscCall(PetscFree6(lvalues, gvalues, reducetype, invecs, lvalues_mix, gvalues_mix));
267:   PetscFunctionReturn(PETSC_SUCCESS);
268: }

270: static PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
271: {
272:   PetscFunctionBegin;
273:   PetscCall(PetscFree6(sr->lvalues, sr->gvalues, sr->reducetype, sr->invecs, sr->lvalues_mix, sr->gvalues_mix));
274:   PetscCall(PetscFree(sr));
275:   PetscFunctionReturn(PETSC_SUCCESS);
276: }

278: PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;

280: /*
281:    Private routine to delete internal storage when a communicator is freed.
282:   This is called by MPI, not by users.

284:   The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
285:   it was MPI_Comm *comm.
286: */
287: static PetscMPIInt MPIAPI Petsc_DelReduction(MPI_Comm comm, PETSC_UNUSED PetscMPIInt keyval, void *attr_val, PETSC_UNUSED void *extra_state)
288: {
289:   PetscFunctionBegin;
290:   PetscCallReturnMPI(PetscInfo(0, "Deleting reduction data in an MPI_Comm %ld\n", (long)comm));
291:   PetscCallReturnMPI(PetscSplitReductionDestroy((PetscSplitReduction *)attr_val));
292:   PetscFunctionReturn(PETSC_SUCCESS);
293: }

295: /*
296:      PetscSplitReductionGet - Gets the split reduction object from a
297:         PETSc vector, creates if it does not exit.

299: */
300: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm, PetscSplitReduction **sr)
301: {
302:   PetscMPIInt flag;

304:   PetscFunctionBegin;
305:   PetscCheck(!PetscDefined(HAVE_THREADSAFETY), comm, PETSC_ERR_SUP, "PetscSplitReductionGet() is not thread-safe");
306:   if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
307:     /*
308:        The calling sequence of the 2nd argument to this function changed
309:        between MPI Standard 1.0 and the revisions 1.1 Here we match the
310:        new standard, if you are using an MPI implementation that uses
311:        the older version you will get a warning message about the next line;
312:        it is only a warning message and should do no harm.
313:     */
314:     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelReduction, &Petsc_Reduction_keyval, NULL));
315:   }
316:   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Reduction_keyval, (void **)sr, &flag));
317:   if (!flag) { /* doesn't exist yet so create it and put it in */
318:     PetscCall(PetscSplitReductionCreate(comm, sr));
319:     PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Reduction_keyval, *sr));
320:     PetscCall(PetscInfo(0, "Putting reduction data in an MPI_Comm %ld\n", (long)comm));
321:   }
322:   PetscFunctionReturn(PETSC_SUCCESS);
323: }

325: /* ----------------------------------------------------------------------------------------------------*/

327: /*@
328:   VecDotBegin - Starts a split phase dot product computation.

330:   Input Parameters:
331: + x      - the first vector
332: . y      - the second vector
333: - result - where the result will go (can be `NULL`)

335:   Level: advanced

337:   Notes:
338:   Each call to `VecDotBegin()` should be paired with a call to `VecDotEnd()`.

340: .seealso: `VecDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
341:           `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`
342: @*/
343: PetscErrorCode VecDotBegin(Vec x, Vec y, PetscScalar *result)
344: {
345:   PetscSplitReduction *sr;
346:   MPI_Comm             comm;

348:   PetscFunctionBegin;
351:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
352:   if (PetscDefined(HAVE_THREADSAFETY)) {
353:     PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
354:     PetscCall(VecDot(x, y, result));
355:     PetscFunctionReturn(PETSC_SUCCESS);
356:   }
357:   PetscCall(PetscSplitReductionGet(comm, &sr));
358:   PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
359:   if (sr->numopsbegin >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
360:   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
361:   sr->invecs[sr->numopsbegin]     = (void *)x;
362:   PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
363:   PetscUseTypeMethod(x, dot_local, y, sr->lvalues + sr->numopsbegin++);
364:   PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
365:   PetscFunctionReturn(PETSC_SUCCESS);
366: }

368: /*@
369:   VecDotEnd - Ends a split phase dot product computation.

371:   Input Parameters:
372: + x      - the first vector (can be `NULL`)
373: . y      - the second vector (can be `NULL`)
374: - result - where the result will go

376:   Level: advanced

378:   Notes:
379:   Each call to `VecDotBegin()` should be paired with a call to `VecDotEnd()`.

381: .seealso: `VecDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
382:           `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`
383: @*/
384: PetscErrorCode VecDotEnd(Vec x, Vec y, PetscScalar *result)
385: {
386:   PetscSplitReduction *sr;
387:   MPI_Comm             comm;

389:   PetscFunctionBegin;
390:   if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
391:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
392:   PetscCall(PetscSplitReductionGet(comm, &sr));
393:   PetscCall(PetscSplitReductionEnd(sr));

395:   PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
396:   PetscCheck(!x || (void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
397:   PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_SUM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecDotEnd() on a reduction started with VecNormBegin()");
398:   *result = sr->gvalues[sr->numopsend++];

400:   /*
401:      We are finished getting all the results so reset to no outstanding requests
402:   */
403:   if (sr->numopsend == sr->numopsbegin) {
404:     sr->state       = STATE_BEGIN;
405:     sr->numopsend   = 0;
406:     sr->numopsbegin = 0;
407:     sr->mix         = PETSC_FALSE;
408:   }
409:   PetscFunctionReturn(PETSC_SUCCESS);
410: }

412: /*@
413:   VecTDotBegin - Starts a split phase transpose dot product computation.

415:   Input Parameters:
416: + x      - the first vector
417: . y      - the second vector
418: - result - where the result will go (can be `NULL`)

420:   Level: advanced

422:   Notes:
423:   Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.

425: .seealso: `VecTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
426:           `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
427: @*/
428: PetscErrorCode VecTDotBegin(Vec x, Vec y, PetscScalar *result)
429: {
430:   PetscSplitReduction *sr;
431:   MPI_Comm             comm;

433:   PetscFunctionBegin;
434:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
435:   if (PetscDefined(HAVE_THREADSAFETY)) {
436:     PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
437:     PetscCall(VecTDot(x, y, result));
438:     PetscFunctionReturn(PETSC_SUCCESS);
439:   }
440:   PetscCall(PetscSplitReductionGet(comm, &sr));
441:   PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
442:   if (sr->numopsbegin >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
443:   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
444:   sr->invecs[sr->numopsbegin]     = (void *)x;
445:   PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
446:   PetscUseTypeMethod(x, tdot_local, y, sr->lvalues + sr->numopsbegin++);
447:   PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
448:   PetscFunctionReturn(PETSC_SUCCESS);
449: }

451: /*@
452:   VecTDotEnd - Ends a split phase transpose dot product computation.

454:   Input Parameters:
455: + x      - the first vector (can be `NULL`)
456: . y      - the second vector (can be `NULL`)
457: - result - where the result will go

459:   Level: advanced

461:   Notes:
462:   Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.

464: .seealso: `VecTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
465:           `VecDotBegin()`, `VecDotEnd()`
466: @*/
467: PetscErrorCode VecTDotEnd(Vec x, Vec y, PetscScalar *result)
468: {
469:   PetscFunctionBegin;
470:   /*
471:       TDotEnd() is the same as DotEnd() so reuse the code
472:   */
473:   PetscCall(VecDotEnd(x, y, result));
474:   PetscFunctionReturn(PETSC_SUCCESS);
475: }

477: /* -------------------------------------------------------------------------*/

479: /*@
480:   VecNormBegin - Starts a split phase norm computation.

482:   Input Parameters:
483: + x      - the first vector
484: . ntype  - norm type, one of `NORM_1`, `NORM_2`, `NORM_MAX`, `NORM_1_AND_2`
485: - result - where the result will go (can be `NULL`)

487:   Level: advanced

489:   Notes:
490:   Each call to `VecNormBegin()` should be paired with a call to `VecNormEnd()`.

492: .seealso: `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
493: @*/
494: PetscErrorCode VecNormBegin(Vec x, NormType ntype, PetscReal *result)
495: {
496:   PetscSplitReduction *sr;
497:   PetscReal            lresult[2];
498:   MPI_Comm             comm;

500:   PetscFunctionBegin;
502:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
503:   if (PetscDefined(HAVE_THREADSAFETY)) {
504:     PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
505:     PetscCall(PetscObjectStateIncrease((PetscObject)x)); // increase PetscObjectState to invalidate cached norms
506:     PetscCall(VecNorm(x, ntype, result));
507:     PetscFunctionReturn(PETSC_SUCCESS);
508:   }
509:   PetscCall(PetscSplitReductionGet(comm, &sr));
510:   PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
511:   if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops - 1 && ntype == NORM_1_AND_2)) PetscCall(PetscSplitReductionExtend(sr));

513:   sr->invecs[sr->numopsbegin] = (void *)x;
514:   PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
515:   PetscUseTypeMethod(x, norm_local, ntype, lresult);
516:   PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
517:   if (ntype == NORM_2) lresult[0] = lresult[0] * lresult[0];
518:   if (ntype == NORM_1_AND_2) lresult[1] = lresult[1] * lresult[1];
519:   if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_MAX;
520:   else sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
521:   sr->lvalues[sr->numopsbegin++] = lresult[0];
522:   if (ntype == NORM_1_AND_2) {
523:     sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
524:     sr->lvalues[sr->numopsbegin++]  = lresult[1];
525:   }
526:   PetscFunctionReturn(PETSC_SUCCESS);
527: }

529: /*@
530:   VecNormEnd - Ends a split phase norm computation.

532:   Input Parameters:
533: + x      - the first vector
534: . ntype  - norm type, one of `NORM_1`, `NORM_2`, `NORM_MAX`, `NORM_1_AND_2`
535: - result - where the result will go

537:   Level: advanced

539:   Notes:
540:   Each call to `VecNormBegin()` should be paired with a call to `VecNormEnd()`.

542:   The `x` vector is not allowed to be `NULL`, otherwise the vector would not have its correctly cached norm value

544: .seealso: `VecNormBegin()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
545: @*/
546: PetscErrorCode VecNormEnd(Vec x, NormType ntype, PetscReal *result)
547: {
548:   PetscSplitReduction *sr;
549:   MPI_Comm             comm;

551:   PetscFunctionBegin;
552:   if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
554:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
555:   PetscCall(PetscSplitReductionGet(comm, &sr));
556:   PetscCall(PetscSplitReductionEnd(sr));

558:   PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
559:   PetscCheck((void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
560:   PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_MAX || ntype != NORM_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
561:   result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);

563:   if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
564:   else if (ntype == NORM_1_AND_2) {
565:     result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
566:     result[1] = PetscSqrtReal(result[1]);
567:   }
568:   if (ntype != NORM_1_AND_2) PetscCall(PetscObjectComposedDataSetReal((PetscObject)x, NormIds[ntype], result[0]));

570:   if (sr->numopsend == sr->numopsbegin) {
571:     sr->state       = STATE_BEGIN;
572:     sr->numopsend   = 0;
573:     sr->numopsbegin = 0;
574:   }
575:   PetscFunctionReturn(PETSC_SUCCESS);
576: }

578: /*
579:    Possibly add

581:      PetscReductionSumBegin/End()
582:      PetscReductionMaxBegin/End()
583:      PetscReductionMinBegin/End()
584:    or have more like MPI with a single function with flag for Op? Like first better
585: */

587: /*@
588:   VecMDotBegin - Starts a split phase multiple dot product computation.

590:   Input Parameters:
591: + x      - the first vector
592: . nv     - number of vectors
593: . y      - array of vectors
594: - result - where the result will go (can be `NULL`)

596:   Level: advanced

598:   Notes:
599:   Each call to `VecMDotBegin()` should be paired with a call to `VecMDotEnd()`.

601: .seealso: `VecMDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
602:           `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
603: @*/
604: PetscErrorCode VecMDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
605: {
606:   PetscSplitReduction *sr;
607:   MPI_Comm             comm;
608:   PetscInt             i;

610:   PetscFunctionBegin;
611:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
612:   if (PetscDefined(HAVE_THREADSAFETY)) {
613:     PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
614:     PetscCall(VecMDot(x, nv, y, result));
615:     PetscFunctionReturn(PETSC_SUCCESS);
616:   }
617:   PetscCall(PetscSplitReductionGet(comm, &sr));
618:   PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
619:   for (i = 0; i < nv; i++) {
620:     if (sr->numopsbegin + i >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
621:     sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
622:     sr->invecs[sr->numopsbegin + i]     = (void *)x;
623:   }
624:   PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
625:   PetscUseTypeMethod(x, mdot_local, nv, y, sr->lvalues + sr->numopsbegin);
626:   PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
627:   sr->numopsbegin += nv;
628:   PetscFunctionReturn(PETSC_SUCCESS);
629: }

631: /*@
632:   VecMDotEnd - Ends a split phase multiple dot product computation.

634:   Input Parameters:
635: + x  - the first vector (can be `NULL`)
636: . nv - number of vectors
637: - y  - array of vectors (can be `NULL`)

639:   Output Parameter:
640: . result - where the result will go

642:   Level: advanced

644:   Notes:
645:   Each call to `VecMDotBegin()` should be paired with a call to `VecMDotEnd()`.

647: .seealso: `VecMDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
648:           `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
649: @*/
650: PetscErrorCode VecMDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
651: {
652:   PetscSplitReduction *sr;
653:   MPI_Comm             comm;
654:   PetscInt             i;

656:   PetscFunctionBegin;
657:   if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
658:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
659:   PetscCall(PetscSplitReductionGet(comm, &sr));
660:   PetscCall(PetscSplitReductionEnd(sr));

662:   PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
663:   PetscCheck(!x || (void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
664:   PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_SUM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecDotEnd() on a reduction started with VecNormBegin()");
665:   for (i = 0; i < nv; i++) result[i] = sr->gvalues[sr->numopsend++];

667:   /*
668:      We are finished getting all the results so reset to no outstanding requests
669:   */
670:   if (sr->numopsend == sr->numopsbegin) {
671:     sr->state       = STATE_BEGIN;
672:     sr->numopsend   = 0;
673:     sr->numopsbegin = 0;
674:   }
675:   PetscFunctionReturn(PETSC_SUCCESS);
676: }

678: /*@
679:   VecMTDotBegin - Starts a split phase transpose multiple dot product computation.

681:   Input Parameters:
682: + x      - the first vector
683: . nv     - number of vectors
684: . y      - array of  vectors
685: - result - where the result will go (can be `NULL`)

687:   Level: advanced

689:   Notes:
690:   Each call to `VecMTDotBegin()` should be paired with a call to `VecMTDotEnd()`.

692: .seealso: `VecMTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
693:           `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
694: @*/
695: PetscErrorCode VecMTDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
696: {
697:   PetscSplitReduction *sr;
698:   MPI_Comm             comm;
699:   PetscInt             i;

701:   PetscFunctionBegin;
702:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
703:   if (PetscDefined(HAVE_THREADSAFETY)) {
704:     PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
705:     PetscCall(VecMTDot(x, nv, y, result));
706:     PetscFunctionReturn(PETSC_SUCCESS);
707:   }
708:   PetscCall(PetscSplitReductionGet(comm, &sr));
709:   PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
710:   for (i = 0; i < nv; i++) {
711:     if (sr->numopsbegin + i >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
712:     sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
713:     sr->invecs[sr->numopsbegin + i]     = (void *)x;
714:   }
715:   PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
716:   PetscUseTypeMethod(x, mtdot_local, nv, y, sr->lvalues + sr->numopsbegin);
717:   PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
718:   sr->numopsbegin += nv;
719:   PetscFunctionReturn(PETSC_SUCCESS);
720: }

722: /*@
723:   VecMTDotEnd - Ends a split phase transpose multiple dot product computation.

725:   Input Parameters:
726: + x  - the first vector (can be `NULL`)
727: . nv - number of vectors
728: - y  - array of  vectors (can be `NULL`)

730:   Output Parameter:
731: . result - where the result will go

733:   Level: advanced

735:   Notes:
736:   Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.

738: .seealso: `VecMTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
739:           `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
740: @*/
741: PetscErrorCode VecMTDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
742: {
743:   PetscFunctionBegin;
744:   /*
745:       MTDotEnd() is the same as MDotEnd() so reuse the code
746:   */
747:   PetscCall(VecMDotEnd(x, nv, y, result));
748:   PetscFunctionReturn(PETSC_SUCCESS);
749: }