Actual source code: comb.c

  1: /*
  2:       Split phase global vector reductions with support for combining the
  3:    communication portion of several operations. Using MPI-1.1 support only

  5:       The idea for this and much of the initial code is contributed by
  6:    Victor Eijkhout.

  8:        Usage:
  9:              VecDotBegin(Vec,Vec,PetscScalar *);
 10:              VecNormBegin(Vec,NormType,PetscReal *);
 11:              ....
 12:              VecDotEnd(Vec,Vec,PetscScalar *);
 13:              VecNormEnd(Vec,NormType,PetscReal *);

 15:        Limitations:
 16:          - The order of the xxxEnd() functions MUST be in the same order
 17:            as the xxxBegin(). There is extensive error checking to try to
 18:            insure that the user calls the routines in the correct order
 19: */

 21: #include <petsc/private/vecimpl.h>

 23: static PetscMPIInt MPIU_Iallreduce(void *sendbuf, void *recvbuf, PetscMPIInt count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request)
 24: {
 25:   PetscMPIInt err;

 27: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
 28:   err = MPI_Iallreduce(sendbuf, recvbuf, count, datatype, op, comm, request);
 29: #else
 30:   err      = MPIU_Allreduce(sendbuf, recvbuf, count, datatype, op, comm);
 31:   *request = MPI_REQUEST_NULL;
 32: #endif
 33:   return err;
 34: }

 36: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *);

 38: /*
 39:    PetscSplitReductionCreate - Creates a data structure to contain the queued information.
 40: */
 41: static PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm, PetscSplitReduction **sr)
 42: {
 43:   PetscFunctionBegin;
 44:   PetscCall(PetscNew(sr));
 45:   (*sr)->numopsbegin = 0;
 46:   (*sr)->numopsend   = 0;
 47:   (*sr)->state       = STATE_BEGIN;
 48: #define MAXOPS 32
 49:   (*sr)->maxops = MAXOPS;
 50:   PetscCall(PetscMalloc6(MAXOPS, &(*sr)->lvalues, MAXOPS, &(*sr)->gvalues, MAXOPS, &(*sr)->invecs, MAXOPS, &(*sr)->reducetype, MAXOPS, &(*sr)->lvalues_mix, MAXOPS, &(*sr)->gvalues_mix));
 51: #undef MAXOPS
 52:   (*sr)->comm    = comm;
 53:   (*sr)->request = MPI_REQUEST_NULL;
 54:   (*sr)->mix     = PETSC_FALSE;
 55:   (*sr)->async   = PetscDefined(HAVE_MPI_NONBLOCKING_COLLECTIVES) ? PETSC_TRUE : PETSC_FALSE; /* Enable by default */
 56:   /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
 57:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-splitreduction_async", &(*sr)->async, NULL));
 58:   PetscFunctionReturn(PETSC_SUCCESS);
 59: }

 61: /*
 62:        This function is the MPI reduction operation used when there is
 63:    a combination of sums and max in the reduction. The call below to
 64:    MPI_Op_create() converts the function PetscSplitReduction_Local() to the
 65:    MPI operator PetscSplitReduction_Op.
 66: */
 67: MPI_Op PetscSplitReduction_Op = 0;

 69: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in, void *out, PetscMPIInt *cnt, MPI_Datatype *datatype)
 70: {
 71:   struct PetscScalarInt {
 72:     PetscScalar v;
 73:     PetscInt    i;
 74:   };
 75:   struct PetscScalarInt *xin  = (struct PetscScalarInt *)in;
 76:   struct PetscScalarInt *xout = (struct PetscScalarInt *)out;
 77:   PetscInt               i, count = *cnt;

 79:   PetscFunctionBegin;
 80:   if (*datatype != MPIU_SCALAR_INT) {
 81:     PetscCallAbort(MPI_COMM_SELF, (*PetscErrorPrintf)("Can only handle MPIU_SCALAR_INT data types"));
 82:     PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
 83:   }
 84:   for (i = 0; i < count; i++) {
 85:     if (xin[i].i == PETSC_SR_REDUCE_SUM) xout[i].v += xin[i].v;
 86:     else if (xin[i].i == PETSC_SR_REDUCE_MAX) xout[i].v = PetscMax(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
 87:     else if (xin[i].i == PETSC_SR_REDUCE_MIN) xout[i].v = PetscMin(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
 88:     else {
 89:       PetscCallAbort(MPI_COMM_SELF, (*PetscErrorPrintf)("Reduction type input is not PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN"));
 90:       PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
 91:     }
 92:   }
 93:   PetscFunctionReturnVoid();
 94: }

 96: /*@
 97:   PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction

 99:   Collective but not synchronizing

101:   Input Parameter:
102: . comm - communicator on which split reduction has been queued

104:   Level: advanced

106:   Note:
107:   Calling this function is optional when using split-mode reduction. On supporting hardware,
108:   calling this after all VecXxxBegin() allows the reduction to make asynchronous progress
109:   before the result is needed (in VecXxxEnd()).

111: .seealso: `VecNormBegin()`, `VecNormEnd()`, `VecDotBegin()`, `VecDotEnd()`, `VecTDotBegin()`, `VecTDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`
112: @*/
113: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
114: {
115:   PetscSplitReduction *sr;

117:   PetscFunctionBegin;
118:   if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
119:   PetscCall(PetscSplitReductionGet(comm, &sr));
120:   PetscCheck(sr->numopsend <= 0, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot call this after VecxxxEnd() has been called");
121:   if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
122:     PetscMPIInt           numops     = sr->numopsbegin;
123:     PetscSRReductionType *reducetype = sr->reducetype;
124:     PetscScalar          *lvalues = sr->lvalues, *gvalues = sr->gvalues;
125:     PetscInt              sum_flg = 0, max_flg = 0, min_flg = 0;
126:     MPI_Comm              comm = sr->comm;
127:     PetscMPIInt           size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);

129:     PetscCall(PetscLogEventBegin(VEC_ReduceBegin, 0, 0, 0, 0));
130:     PetscCallMPI(MPI_Comm_size(sr->comm, &size));
131:     if (size == 1) {
132:       PetscCall(PetscArraycpy(gvalues, lvalues, numops));
133:     } else {
134:       /* determine if all reductions are sum, max, or min */
135:       for (PetscMPIInt i = 0; i < numops; i++) {
136:         if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
137:         else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
138:         else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
139:         else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
140:       }
141:       PetscCheck(sum_flg + max_flg + min_flg <= 1 || !sr->mix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
142:       if (sum_flg + max_flg + min_flg > 1) {
143:         sr->mix = PETSC_TRUE;
144:         for (PetscMPIInt i = 0; i < numops; i++) {
145:           sr->lvalues_mix[i].v = lvalues[i];
146:           sr->lvalues_mix[i].i = reducetype[i];
147:         }
148:         PetscCallMPI(MPIU_Iallreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm, &sr->request));
149:       } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
150:         PetscCallMPI(MPIU_Iallreduce(lvalues, gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm, &sr->request));
151:       } else if (min_flg) {
152:         PetscCallMPI(MPIU_Iallreduce(lvalues, gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm, &sr->request));
153:       } else {
154:         PetscCallMPI(MPIU_Iallreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm, &sr->request));
155:       }
156:     }
157:     sr->state     = STATE_PENDING;
158:     sr->numopsend = 0;
159:     PetscCall(PetscLogEventEnd(VEC_ReduceBegin, 0, 0, 0, 0));
160:   } else {
161:     PetscCall(PetscSplitReductionApply(sr));
162:   }
163:   PetscFunctionReturn(PETSC_SUCCESS);
164: }

166: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
167: {
168:   PetscFunctionBegin;
169:   switch (sr->state) {
170:   case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
171:     PetscCall(PetscSplitReductionApply(sr));
172:     break;
173:   case STATE_PENDING:
174:     /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
175:     PetscCall(PetscLogEventBegin(VEC_ReduceEnd, 0, 0, 0, 0));
176:     if (sr->request != MPI_REQUEST_NULL) PetscCallMPI(MPI_Wait(&sr->request, MPI_STATUS_IGNORE));
177:     sr->state = STATE_END;
178:     if (sr->mix) {
179:       for (PetscMPIInt i = 0; i < sr->numopsbegin; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
180:       sr->mix = PETSC_FALSE;
181:     }
182:     PetscCall(PetscLogEventEnd(VEC_ReduceEnd, 0, 0, 0, 0));
183:     break;
184:   default:
185:     break; /* everything is already done */
186:   }
187:   PetscFunctionReturn(PETSC_SUCCESS);
188: }

190: /*
191:    PetscSplitReductionApply - Actually do the communication required for a split phase reduction
192: */
193: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
194: {
195:   PetscMPIInt           numops     = sr->numopsbegin;
196:   PetscSRReductionType *reducetype = sr->reducetype;
197:   PetscScalar          *lvalues = sr->lvalues, *gvalues = sr->gvalues;
198:   PetscInt              sum_flg = 0, max_flg = 0, min_flg = 0;
199:   MPI_Comm              comm = sr->comm;
200:   PetscMPIInt           size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);

202:   PetscFunctionBegin;
203:   PetscCheck(sr->numopsend <= 0, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot call this after VecxxxEnd() has been called");
204:   PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
205:   PetscCallMPI(MPI_Comm_size(sr->comm, &size));
206:   if (size == 1) {
207:     PetscCall(PetscArraycpy(gvalues, lvalues, numops));
208:   } else {
209:     /* determine if all reductions are sum, max, or min */
210:     for (PetscMPIInt i = 0; i < numops; i++) {
211:       if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
212:       else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
213:       else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
214:       else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
215:     }
216:     if (sum_flg + max_flg + min_flg > 1) {
217:       PetscCheck(!sr->mix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
218:       for (PetscMPIInt i = 0; i < numops; i++) {
219:         sr->lvalues_mix[i].v = lvalues[i];
220:         sr->lvalues_mix[i].i = reducetype[i];
221:       }
222:       PetscCallMPI(MPIU_Allreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm));
223:       for (PetscMPIInt i = 0; i < numops; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
224:     } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
225:       PetscCallMPI(MPIU_Allreduce(lvalues, gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm));
226:     } else if (min_flg) {
227:       PetscCallMPI(MPIU_Allreduce(lvalues, gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm));
228:     } else {
229:       PetscCallMPI(MPIU_Allreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm));
230:     }
231:   }
232:   sr->state     = STATE_END;
233:   sr->numopsend = 0;
234:   PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
235:   PetscFunctionReturn(PETSC_SUCCESS);
236: }

238: /*
239:    PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
240: */
241: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
242: {
243:   struct PetscScalarInt {
244:     PetscScalar v;
245:     PetscInt    i;
246:   };
247:   PetscMPIInt            maxops     = sr->maxops;
248:   PetscSRReductionType  *reducetype = sr->reducetype;
249:   PetscScalar           *lvalues = sr->lvalues, *gvalues = sr->gvalues;
250:   struct PetscScalarInt *lvalues_mix = (struct PetscScalarInt *)sr->lvalues_mix;
251:   struct PetscScalarInt *gvalues_mix = (struct PetscScalarInt *)sr->gvalues_mix;
252:   void                 **invecs      = sr->invecs;

254:   PetscFunctionBegin;
255:   sr->maxops = 2 * maxops;
256:   PetscCall(PetscMalloc6(2 * maxops, &sr->lvalues, 2 * maxops, &sr->gvalues, 2 * maxops, &sr->reducetype, 2 * maxops, &sr->invecs, 2 * maxops, &sr->lvalues_mix, 2 * maxops, &sr->gvalues_mix));
257:   PetscCall(PetscArraycpy(sr->lvalues, lvalues, maxops));
258:   PetscCall(PetscArraycpy(sr->gvalues, gvalues, maxops));
259:   PetscCall(PetscArraycpy(sr->reducetype, reducetype, maxops));
260:   PetscCall(PetscArraycpy(sr->invecs, invecs, maxops));
261:   PetscCall(PetscArraycpy(sr->lvalues_mix, lvalues_mix, maxops));
262:   PetscCall(PetscArraycpy(sr->gvalues_mix, gvalues_mix, maxops));
263:   PetscCall(PetscFree6(lvalues, gvalues, reducetype, invecs, lvalues_mix, gvalues_mix));
264:   PetscFunctionReturn(PETSC_SUCCESS);
265: }

267: static PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
268: {
269:   PetscFunctionBegin;
270:   PetscCall(PetscFree6(sr->lvalues, sr->gvalues, sr->reducetype, sr->invecs, sr->lvalues_mix, sr->gvalues_mix));
271:   PetscCall(PetscFree(sr));
272:   PetscFunctionReturn(PETSC_SUCCESS);
273: }

275: PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;

277: /*
278:    Private routine to delete internal storage when a communicator is freed.
279:   This is called by MPI, not by users.

281:   The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
282:   it was MPI_Comm *comm.
283: */
284: static PetscMPIInt MPIAPI Petsc_DelReduction(MPI_Comm comm, PETSC_UNUSED PetscMPIInt keyval, void *attr_val, PETSC_UNUSED void *extra_state)
285: {
286:   PetscFunctionBegin;
287:   PetscCallReturnMPI(PetscInfo(0, "Deleting reduction data in an MPI_Comm %ld\n", (long)comm));
288:   PetscCallReturnMPI(PetscSplitReductionDestroy((PetscSplitReduction *)attr_val));
289:   PetscFunctionReturn(PETSC_SUCCESS);
290: }

292: /*
293:      PetscSplitReductionGet - Gets the split reduction object from a
294:         PETSc vector, creates if it does not exit.

296: */
297: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm, PetscSplitReduction **sr)
298: {
299:   PetscMPIInt iflg;

301:   PetscFunctionBegin;
302:   PetscCheck(!PetscDefined(HAVE_THREADSAFETY), comm, PETSC_ERR_SUP, "PetscSplitReductionGet() is not thread-safe");
303:   if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
304:     /*
305:        The calling sequence of the 2nd argument to this function changed
306:        between MPI Standard 1.0 and the revisions 1.1 Here we match the
307:        new standard, if you are using an MPI implementation that uses
308:        the older version you will get a warning message about the next line;
309:        it is only a warning message and should do no harm.
310:     */
311:     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelReduction, &Petsc_Reduction_keyval, NULL));
312:   }
313:   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Reduction_keyval, (void **)sr, &iflg));
314:   if (!iflg) { /* doesn't exist yet so create it and put it in */
315:     PetscCall(PetscSplitReductionCreate(comm, sr));
316:     PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Reduction_keyval, *sr));
317:     PetscCall(PetscInfo(0, "Putting reduction data in an MPI_Comm %ld\n", (long)comm));
318:   }
319:   PetscFunctionReturn(PETSC_SUCCESS);
320: }

322: /*@
323:   VecDotBegin - Starts a split phase dot product computation.

325:   Input Parameters:
326: + x      - the first vector
327: . y      - the second vector
328: - result - where the result will go (can be `NULL`)

330:   Level: advanced

332:   Notes:
333:   Each call to `VecDotBegin()` should be paired with a call to `VecDotEnd()`.

335: .seealso: `VecDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
336:           `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`
337: @*/
338: PetscErrorCode VecDotBegin(Vec x, Vec y, PetscScalar *result)
339: {
340:   PetscSplitReduction *sr;
341:   MPI_Comm             comm;

343:   PetscFunctionBegin;
346:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
347:   if (PetscDefined(HAVE_THREADSAFETY)) {
348:     PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
349:     PetscCall(VecDot(x, y, result));
350:     PetscFunctionReturn(PETSC_SUCCESS);
351:   }
352:   PetscCall(PetscSplitReductionGet(comm, &sr));
353:   PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
354:   if (sr->numopsbegin >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
355:   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
356:   sr->invecs[sr->numopsbegin]     = (void *)x;
357:   PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
358:   PetscUseTypeMethod(x, dot_local, y, sr->lvalues + sr->numopsbegin++);
359:   PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
360:   PetscFunctionReturn(PETSC_SUCCESS);
361: }

363: /*@
364:   VecDotEnd - Ends a split phase dot product computation.

366:   Input Parameters:
367: + x      - the first vector (can be `NULL`)
368: . y      - the second vector (can be `NULL`)
369: - result - where the result will go

371:   Level: advanced

373:   Notes:
374:   Each call to `VecDotBegin()` should be paired with a call to `VecDotEnd()`.

376: .seealso: `VecDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
377:           `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`
378: @*/
379: PetscErrorCode VecDotEnd(Vec x, Vec y, PetscScalar *result)
380: {
381:   PetscSplitReduction *sr;
382:   MPI_Comm             comm;

384:   PetscFunctionBegin;
385:   if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
386:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
387:   PetscCall(PetscSplitReductionGet(comm, &sr));
388:   PetscCall(PetscSplitReductionEnd(sr));

390:   PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
391:   PetscCheck(!x || (void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
392:   PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_SUM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecDotEnd() on a reduction started with VecNormBegin()");
393:   *result = sr->gvalues[sr->numopsend++];

395:   /*
396:      We are finished getting all the results so reset to no outstanding requests
397:   */
398:   if (sr->numopsend == sr->numopsbegin) {
399:     sr->state       = STATE_BEGIN;
400:     sr->numopsend   = 0;
401:     sr->numopsbegin = 0;
402:     sr->mix         = PETSC_FALSE;
403:   }
404:   PetscFunctionReturn(PETSC_SUCCESS);
405: }

407: /*@
408:   VecTDotBegin - Starts a split phase transpose dot product computation.

410:   Input Parameters:
411: + x      - the first vector
412: . y      - the second vector
413: - result - where the result will go (can be `NULL`)

415:   Level: advanced

417:   Notes:
418:   Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.

420: .seealso: `VecTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
421:           `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
422: @*/
423: PetscErrorCode VecTDotBegin(Vec x, Vec y, PetscScalar *result)
424: {
425:   PetscSplitReduction *sr;
426:   MPI_Comm             comm;

428:   PetscFunctionBegin;
429:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
430:   if (PetscDefined(HAVE_THREADSAFETY)) {
431:     PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
432:     PetscCall(VecTDot(x, y, result));
433:     PetscFunctionReturn(PETSC_SUCCESS);
434:   }
435:   PetscCall(PetscSplitReductionGet(comm, &sr));
436:   PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
437:   if (sr->numopsbegin >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
438:   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
439:   sr->invecs[sr->numopsbegin]     = (void *)x;
440:   PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
441:   PetscUseTypeMethod(x, tdot_local, y, sr->lvalues + sr->numopsbegin++);
442:   PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
443:   PetscFunctionReturn(PETSC_SUCCESS);
444: }

446: /*@
447:   VecTDotEnd - Ends a split phase transpose dot product computation.

449:   Input Parameters:
450: + x      - the first vector (can be `NULL`)
451: . y      - the second vector (can be `NULL`)
452: - result - where the result will go

454:   Level: advanced

456:   Notes:
457:   Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.

459: .seealso: `VecTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
460:           `VecDotBegin()`, `VecDotEnd()`
461: @*/
462: PetscErrorCode VecTDotEnd(Vec x, Vec y, PetscScalar *result)
463: {
464:   PetscFunctionBegin;
465:   /*
466:       TDotEnd() is the same as DotEnd() so reuse the code
467:   */
468:   PetscCall(VecDotEnd(x, y, result));
469:   PetscFunctionReturn(PETSC_SUCCESS);
470: }

472: /*@
473:   VecNormBegin - Starts a split phase norm computation.

475:   Input Parameters:
476: + x      - the first vector
477: . ntype  - norm type, one of `NORM_1`, `NORM_2`, `NORM_MAX`, `NORM_1_AND_2`
478: - result - where the result will go (can be `NULL`)

480:   Level: advanced

482:   Notes:
483:   Each call to `VecNormBegin()` should be paired with a call to `VecNormEnd()`.

485: .seealso: `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
486: @*/
487: PetscErrorCode VecNormBegin(Vec x, NormType ntype, PetscReal *result)
488: {
489:   PetscSplitReduction *sr;
490:   PetscReal            lresult[2];
491:   MPI_Comm             comm;

493:   PetscFunctionBegin;
495:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
496:   if (PetscDefined(HAVE_THREADSAFETY)) {
497:     PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
498:     PetscCall(PetscObjectStateIncrease((PetscObject)x)); // increase PetscObjectState to invalidate cached norms
499:     PetscCall(VecNorm(x, ntype, result));
500:     PetscFunctionReturn(PETSC_SUCCESS);
501:   }
502:   PetscCall(PetscSplitReductionGet(comm, &sr));
503:   PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
504:   if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops - 1 && ntype == NORM_1_AND_2)) PetscCall(PetscSplitReductionExtend(sr));

506:   sr->invecs[sr->numopsbegin] = (void *)x;
507:   PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
508:   PetscUseTypeMethod(x, norm_local, ntype, lresult);
509:   PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
510:   if (ntype == NORM_2) lresult[0] = lresult[0] * lresult[0];
511:   if (ntype == NORM_1_AND_2) lresult[1] = lresult[1] * lresult[1];
512:   if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_MAX;
513:   else sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
514:   sr->lvalues[sr->numopsbegin++] = lresult[0];
515:   if (ntype == NORM_1_AND_2) {
516:     sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
517:     sr->lvalues[sr->numopsbegin++]  = lresult[1];
518:   }
519:   PetscFunctionReturn(PETSC_SUCCESS);
520: }

522: /*@
523:   VecNormEnd - Ends a split phase norm computation.

525:   Input Parameters:
526: + x      - the first vector
527: . ntype  - norm type, one of `NORM_1`, `NORM_2`, `NORM_MAX`, `NORM_1_AND_2`
528: - result - where the result will go

530:   Level: advanced

532:   Notes:
533:   Each call to `VecNormBegin()` should be paired with a call to `VecNormEnd()`.

535:   The `x` vector is not allowed to be `NULL`, otherwise the vector would not have its correctly cached norm value

537: .seealso: `VecNormBegin()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
538: @*/
539: PetscErrorCode VecNormEnd(Vec x, NormType ntype, PetscReal *result)
540: {
541:   PetscSplitReduction *sr;
542:   MPI_Comm             comm;

544:   PetscFunctionBegin;
545:   if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
547:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
548:   PetscCall(PetscSplitReductionGet(comm, &sr));
549:   PetscCall(PetscSplitReductionEnd(sr));

551:   PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
552:   PetscCheck((void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
553:   PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_MAX || ntype != NORM_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
554:   result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);

556:   if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
557:   else if (ntype == NORM_1_AND_2) {
558:     result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
559:     result[1] = PetscSqrtReal(result[1]);
560:   }
561:   if (ntype != NORM_1_AND_2) PetscCall(PetscObjectComposedDataSetReal((PetscObject)x, NormIds[ntype], result[0]));

563:   if (sr->numopsend == sr->numopsbegin) {
564:     sr->state       = STATE_BEGIN;
565:     sr->numopsend   = 0;
566:     sr->numopsbegin = 0;
567:   }
568:   PetscFunctionReturn(PETSC_SUCCESS);
569: }

571: /*
572:    Possibly add

574:      PetscReductionSumBegin/End()
575:      PetscReductionMaxBegin/End()
576:      PetscReductionMinBegin/End()
577:    or have more like MPI with a single function with flag for Op? Like first better
578: */

580: /*@
581:   VecMDotBegin - Starts a split phase multiple dot product computation.

583:   Input Parameters:
584: + x      - the first vector
585: . nv     - number of vectors
586: . y      - array of vectors
587: - result - where the result will go (can be `NULL`)

589:   Level: advanced

591:   Notes:
592:   Each call to `VecMDotBegin()` should be paired with a call to `VecMDotEnd()`.

594: .seealso: `VecMDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
595:           `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
596: @*/
597: PetscErrorCode VecMDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
598: {
599:   PetscSplitReduction *sr;
600:   MPI_Comm             comm;
601:   PetscInt             i;

603:   PetscFunctionBegin;
604:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
605:   if (PetscDefined(HAVE_THREADSAFETY)) {
606:     PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
607:     PetscCall(VecMDot(x, nv, y, result));
608:     PetscFunctionReturn(PETSC_SUCCESS);
609:   }
610:   PetscCall(PetscSplitReductionGet(comm, &sr));
611:   PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
612:   for (i = 0; i < nv; i++) {
613:     if (sr->numopsbegin + i >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
614:     sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
615:     sr->invecs[sr->numopsbegin + i]     = (void *)x;
616:   }
617:   PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
618:   PetscUseTypeMethod(x, mdot_local, nv, y, sr->lvalues + sr->numopsbegin);
619:   PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
620:   sr->numopsbegin += nv;
621:   PetscFunctionReturn(PETSC_SUCCESS);
622: }

624: /*@
625:   VecMDotEnd - Ends a split phase multiple dot product computation.

627:   Input Parameters:
628: + x  - the first vector (can be `NULL`)
629: . nv - number of vectors
630: - y  - array of vectors (can be `NULL`)

632:   Output Parameter:
633: . result - where the result will go

635:   Level: advanced

637:   Notes:
638:   Each call to `VecMDotBegin()` should be paired with a call to `VecMDotEnd()`.

640: .seealso: `VecMDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
641:           `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
642: @*/
643: PetscErrorCode VecMDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
644: {
645:   PetscSplitReduction *sr;
646:   MPI_Comm             comm;
647:   PetscInt             i;

649:   PetscFunctionBegin;
650:   if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
651:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
652:   PetscCall(PetscSplitReductionGet(comm, &sr));
653:   PetscCall(PetscSplitReductionEnd(sr));

655:   PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
656:   PetscCheck(!x || (void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
657:   PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_SUM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecDotEnd() on a reduction started with VecNormBegin()");
658:   for (i = 0; i < nv; i++) result[i] = sr->gvalues[sr->numopsend++];

660:   /*
661:      We are finished getting all the results so reset to no outstanding requests
662:   */
663:   if (sr->numopsend == sr->numopsbegin) {
664:     sr->state       = STATE_BEGIN;
665:     sr->numopsend   = 0;
666:     sr->numopsbegin = 0;
667:   }
668:   PetscFunctionReturn(PETSC_SUCCESS);
669: }

671: /*@
672:   VecMTDotBegin - Starts a split phase transpose multiple dot product computation.

674:   Input Parameters:
675: + x      - the first vector
676: . nv     - number of vectors
677: . y      - array of  vectors
678: - result - where the result will go (can be `NULL`)

680:   Level: advanced

682:   Notes:
683:   Each call to `VecMTDotBegin()` should be paired with a call to `VecMTDotEnd()`.

685: .seealso: `VecMTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
686:           `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
687: @*/
688: PetscErrorCode VecMTDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
689: {
690:   PetscSplitReduction *sr;
691:   MPI_Comm             comm;
692:   PetscInt             i;

694:   PetscFunctionBegin;
695:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
696:   if (PetscDefined(HAVE_THREADSAFETY)) {
697:     PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
698:     PetscCall(VecMTDot(x, nv, y, result));
699:     PetscFunctionReturn(PETSC_SUCCESS);
700:   }
701:   PetscCall(PetscSplitReductionGet(comm, &sr));
702:   PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
703:   for (i = 0; i < nv; i++) {
704:     if (sr->numopsbegin + i >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
705:     sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
706:     sr->invecs[sr->numopsbegin + i]     = (void *)x;
707:   }
708:   PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
709:   PetscUseTypeMethod(x, mtdot_local, nv, y, sr->lvalues + sr->numopsbegin);
710:   PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
711:   sr->numopsbegin += nv;
712:   PetscFunctionReturn(PETSC_SUCCESS);
713: }

715: /*@
716:   VecMTDotEnd - Ends a split phase transpose multiple dot product computation.

718:   Input Parameters:
719: + x  - the first vector (can be `NULL`)
720: . nv - number of vectors
721: - y  - array of  vectors (can be `NULL`)

723:   Output Parameter:
724: . result - where the result will go

726:   Level: advanced

728:   Notes:
729:   Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.

731: .seealso: `VecMTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
732:           `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
733: @*/
734: PetscErrorCode VecMTDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
735: {
736:   PetscFunctionBegin;
737:   /*
738:       MTDotEnd() is the same as MDotEnd() so reuse the code
739:   */
740:   PetscCall(VecMDotEnd(x, nv, y, result));
741:   PetscFunctionReturn(PETSC_SUCCESS);
742: }