Actual source code: tagm.c

  1: #include <petsc/private/petscimpl.h>
  2: #include <petsc/private/hashmapobj.h>
  3: #include <petsc/private/garbagecollector.h>

  5: /*
  6:    A simple way to manage tags inside a communicator.

  8:    It uses the attributes to determine if a new communicator
  9:       is needed and to store the available tags.

 11: */

 13: /*@
 14:   PetscObjectGetNewTag - Gets a unique new tag from a PETSc object. All
 15:   processors that share the object MUST call this routine EXACTLY the same
 16:   number of times.  This tag should only be used with the current objects
 17:   communicator; do NOT use it with any other MPI communicator.

 19:   Collective

 21:   Input Parameter:
 22: . obj - the PETSc object; this must be cast with a (`PetscObject`), for example,
 23:          `PetscObjectGetNewTag`((`PetscObject`)mat,&tag);

 25:   Output Parameter:
 26: . tag - the new tag

 28:   Level: developer

 30:   Note:
 31:   This tag is needed if one is writing MPI communication code involving message passing and needs unique MPI tags to ensure the messages are connected to this specific
 32:   object.

 34: .seealso: `PetscCommGetNewTag()`
 35: @*/
 36: PetscErrorCode PetscObjectGetNewTag(PetscObject obj, PetscMPIInt *tag)
 37: {
 38:   PetscFunctionBegin;
 39:   PetscCall(PetscCommGetNewTag(obj->comm, tag));
 40:   PetscFunctionReturn(PETSC_SUCCESS);
 41: }

 43: /*@
 44:   PetscCommGetNewTag - Gets a unique new tag from a PETSc communicator

 46:   Collective

 48:   Input Parameter:
 49: . comm - the MPI communicator

 51:   Output Parameter:
 52: . tag - the new tag

 54:   Level: developer

 56:   Notes:
 57:   All processors that share the communicator MUST call this routine EXACTLY the same number of
 58:   times. This tag should only be used with the current objects communicator; do NOT use it
 59:   with any other MPI communicator.

 61: .seealso: `PetscObjectGetNewTag()`, `PetscCommDuplicate()`
 62: @*/
 63: PetscErrorCode PetscCommGetNewTag(MPI_Comm comm, PetscMPIInt *tag)
 64: {
 65:   PetscCommCounter *counter;
 66:   PetscMPIInt      *maxval, flg;

 68:   PetscFunctionBegin;
 69:   PetscAssertPointer(tag, 2);

 71:   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg));
 72:   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Bad MPI communicator supplied; must be a PETSc communicator");

 74:   if (counter->tag < 1) {
 75:     PetscCall(PetscInfo(NULL, "Out of tags for object, starting to recycle. Comm reference count %" PetscInt_FMT "\n", counter->refcount));
 76:     PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
 77:     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
 78:     counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */
 79:   }

 81:   *tag = counter->tag--;
 82:   if (PetscDefined(USE_DEBUG)) {
 83:     /*
 84:      Hanging here means that some processes have called PetscCommGetNewTag() and others have not.
 85:      */
 86:     PetscCallMPI(MPI_Barrier(comm));
 87:   }
 88:   PetscFunctionReturn(PETSC_SUCCESS);
 89: }

 91: /*@C
 92:   PetscCommGetComm - get a new MPI communicator from a PETSc communicator that can be passed off to another package

 94:   Collective

 96:   Input Parameter:
 97: . comm_in - Input communicator

 99:   Output Parameter:
100: . comm_out - Output communicator

102:   Level: developer

104:   Notes:
105:   Use `PetscCommRestoreComm()` to return the communicator when the external package no longer needs it

107:   Certain MPI implementations have `MPI_Comm_free()` that do not work, thus one can run out of available MPI communicators causing
108:   mysterious crashes in the code after running a long time. This routine allows reusing previously obtained MPI communicators that
109:   are no longer needed.

111: .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`, `PetscCommRestoreComm()`
112: @*/
113: PetscErrorCode PetscCommGetComm(MPI_Comm comm_in, MPI_Comm *comm_out)
114: {
115:   PetscCommCounter *counter;
116:   PetscMPIInt       flg;

118:   PetscFunctionBegin;
119:   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
120:   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
121:   PetscCheck(flg, comm_in, PETSC_ERR_ARG_WRONGSTATE, "Requires a PETSc communicator as input, do not use something like MPI_COMM_WORLD");

123:   if (counter->comms) {
124:     struct PetscCommStash *pcomms = counter->comms;

126:     *comm_out      = pcomms->comm;
127:     counter->comms = pcomms->next;
128:     PetscCall(PetscFree(pcomms));
129:     PetscCall(PetscInfo(NULL, "Reusing a communicator %ld %ld\n", (long)comm_in, (long)*comm_out));
130:   } else {
131:     PetscCallMPI(MPI_Comm_dup(comm_in, comm_out));
132:   }
133:   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
134:   PetscFunctionReturn(PETSC_SUCCESS);
135: }

137: /*@C
138:   PetscCommRestoreComm - restores an MPI communicator that was obtained with `PetscCommGetComm()`

140:   Collective

142:   Input Parameters:
143: + comm_in  - Input communicator
144: - comm_out - returned communicator

146:   Level: developer

148: .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`
149: @*/
150: PetscErrorCode PetscCommRestoreComm(MPI_Comm comm_in, MPI_Comm *comm_out)
151: {
152:   PetscCommCounter      *counter;
153:   PetscMPIInt            flg;
154:   struct PetscCommStash *pcomms, *ncomm;

156:   PetscFunctionBegin;
157:   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
158:   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
159:   PetscCheck(flg, comm_in, PETSC_ERR_ARG_WRONGSTATE, "Requires a PETSc communicator as input, do not use something like MPI_COMM_WORLD");

161:   PetscCall(PetscMalloc(sizeof(struct PetscCommStash), &ncomm));
162:   ncomm->comm = *comm_out;
163:   ncomm->next = NULL;
164:   pcomms      = counter->comms;
165:   while (pcomms && pcomms->next) pcomms = pcomms->next;
166:   if (pcomms) {
167:     pcomms->next = ncomm;
168:   } else {
169:     counter->comms = ncomm;
170:   }
171:   *comm_out = 0;
172:   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
173:   PetscFunctionReturn(PETSC_SUCCESS);
174: }

176: /*@C
177:   PetscCommDuplicate - Duplicates the communicator only if it is not already a PETSc communicator.

179:   Collective

181:   Input Parameter:
182: . comm_in - Input communicator

184:   Output Parameters:
185: + comm_out  - Output communicator.  May be `comm_in`.
186: - first_tag - Tag available that has not already been used with this communicator (you may pass in `NULL` if you do not need a tag)

188:   Level: developer

190:   Note:
191:   PETSc communicators are just regular MPI communicators that keep track of which
192:   tags have been used to prevent tag conflict. If you pass a non-PETSc communicator into
193:   a PETSc creation routine it will attach a private communicator for use in the objects communications.
194:   The internal `MPI_Comm` is used to perform all the MPI calls for PETSc, the outer `MPI_Comm` is a user
195:   and is not used by PETSc.

197: .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`
198: @*/
199: PetscErrorCode PetscCommDuplicate(MPI_Comm comm_in, MPI_Comm *comm_out, PetscMPIInt *first_tag)
200: {
201:   PetscInt64       *cidx;
202:   PetscCommCounter *counter;
203:   PetscMPIInt      *maxval, flg;

205:   PetscFunctionBegin;
206:   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
207:   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));

209:   if (!flg) { /* this is NOT a PETSc comm */
210:     union
211:     {
212:       MPI_Comm comm;
213:       void    *ptr;
214:     } ucomm;
215:     /* check if this communicator has a PETSc communicator embedded in it */
216:     PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_InnerComm_keyval, &ucomm, &flg));
217:     if (!flg) {
218:       /* This communicator is not yet known to this system, so we duplicate it and make an internal communicator */
219:       PetscCallMPI(MPI_Comm_dup(comm_in, comm_out));
220:       PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
221:       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
222:       PetscCall(PetscNew(&counter)); /* all fields of counter are zero'ed */
223:       counter->tag = *maxval;
224:       PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_Counter_keyval, counter));
225:       /* Add an object creation index to the communicator */
226:       PetscCall(PetscNew(&cidx));
227:       PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_CreationIdx_keyval, cidx));
228:       PetscCall(PetscInfo(NULL, "Duplicating a communicator %ld %ld max tags = %d\n", (long)comm_in, (long)*comm_out, *maxval));

230:       /* save PETSc communicator inside user communicator, so we can get it next time */
231:       ucomm.comm = *comm_out; /* ONLY the comm part of the union is significant. */
232:       PetscCallMPI(MPI_Comm_set_attr(comm_in, Petsc_InnerComm_keyval, ucomm.ptr));
233:       ucomm.comm = comm_in;
234:       PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_OuterComm_keyval, ucomm.ptr));
235:     } else {
236:       *comm_out = ucomm.comm;
237:       /* pull out the inner MPI_Comm and hand it back to the caller */
238:       PetscCallMPI(MPI_Comm_get_attr(*comm_out, Petsc_Counter_keyval, &counter, &flg));
239:       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set");
240:       PetscCall(PetscInfo(NULL, "Using internal PETSc communicator %ld %ld\n", (long)comm_in, (long)*comm_out));
241:     }
242:   } else *comm_out = comm_in;

244:   if (PetscDefined(USE_DEBUG)) {
245:     /*
246:      Hanging here means that some processes have called PetscCommDuplicate() and others have not.
247:      This likely means that a subset of processes in a MPI_Comm have attempted to create a PetscObject!
248:      ALL processes that share a communicator MUST shared objects created from that communicator.
249:      */
250:     PetscCallMPI(MPI_Barrier(comm_in));
251:   }

253:   if (counter->tag < 1) {
254:     PetscCall(PetscInfo(NULL, "Out of tags for object, starting to recycle. Comm reference count %" PetscInt_FMT "\n", counter->refcount));
255:     PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
256:     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
257:     counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */
258:   }

260:   if (first_tag) *first_tag = counter->tag--;

262:   counter->refcount++; /* number of references to this comm */
263:   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
264:   PetscFunctionReturn(PETSC_SUCCESS);
265: }

267: /*@C
268:   PetscCommDestroy - Frees communicator obtained with `PetscCommDuplicate()`.

270:   Collective

272:   Input Parameter:
273: . comm - the communicator to free

275:   Level: developer

277:   Notes:
278:   Sets `comm` to `NULL`

280:   The communicator is reference counted so it is only truly removed from the system when its reference count drops to zero

282: .seealso: `PetscCommDuplicate()`
283: @*/
284: PetscErrorCode PetscCommDestroy(MPI_Comm *comm)
285: {
286:   PetscInt64       *cidx;
287:   PetscCommCounter *counter;
288:   PetscMPIInt       flg;
289:   PetscGarbage      garbage;
290:   MPI_Comm          icomm = *comm, ocomm;
291:   union
292:   {
293:     MPI_Comm comm;
294:     void    *ptr;
295:   } ucomm;

297:   PetscFunctionBegin;
298:   if (*comm == MPI_COMM_NULL) PetscFunctionReturn(PETSC_SUCCESS);
299:   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
300:   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Counter_keyval, &counter, &flg));
301:   if (!flg) { /* not a PETSc comm, check if it has an inner comm */
302:     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_InnerComm_keyval, &ucomm, &flg));
303:     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "MPI_Comm does not have tag/name counter nor does it have inner MPI_Comm");
304:     icomm = ucomm.comm;
305:     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Counter_keyval, &counter, &flg));
306:     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Inner MPI_Comm does not have expected tag/name counter, problem with corrupted memory");
307:   }
308:   counter->refcount--;
309:   if (!counter->refcount) {
310:     /* if MPI_Comm has outer comm then remove reference to inner MPI_Comm from outer MPI_Comm */
311:     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_OuterComm_keyval, &ucomm, &flg));
312:     if (flg) {
313:       ocomm = ucomm.comm;
314:       PetscCallMPI(MPI_Comm_get_attr(ocomm, Petsc_InnerComm_keyval, &ucomm, &flg));
315:       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Outer MPI_Comm %ld does not have expected reference to inner comm %ld, problem with corrupted memory", (long int)ocomm, (long int)icomm);
316:       PetscCallMPI(MPI_Comm_delete_attr(ocomm, Petsc_InnerComm_keyval));
317:     }

319:     /* Remove the object creation index on the communicator */
320:     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_CreationIdx_keyval, &cidx, &flg));
321:     if (flg) {
322:       PetscCall(PetscFree(cidx));
323:     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "MPI_Comm does not have object creation index");

325:     /* Remove garbage hashmap set up by garbage collection */
326:     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Garbage_HMap_keyval, &garbage, &flg));
327:     if (flg) {
328:       PetscInt entries = 0;
329:       PetscCall(PetscHMapObjGetSize(garbage.map, &entries));
330:       if (entries > 0) PetscCall(PetscGarbageCleanup(icomm));
331:       PetscCall(PetscHMapObjDestroy(&garbage.map));
332:     }

334:     PetscCall(PetscInfo(NULL, "Deleting PETSc MPI_Comm %ld\n", (long)icomm));
335:     PetscCallMPI(MPI_Comm_free(&icomm));
336:   }
337:   *comm = MPI_COMM_NULL;
338:   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
339:   PetscFunctionReturn(PETSC_SUCCESS);
340: }

342: /*@C
343:   PetscObjectsListGetGlobalNumbering - computes a global numbering
344:   of `PetscObject`s living on subcommunicators of a given communicator.

346:   Collective.

348:   Input Parameters:
349: + comm    - the `MPI_Comm`
350: . len     - local length of `objlist`
351: - objlist - a list of PETSc objects living on subcomms of comm and containing this comm rank
352:               (subcomm ordering is assumed to be deadlock-free)

354:   Output Parameters:
355: + count     - global number of distinct subcommunicators on objlist (may be > len)
356: - numbering - global numbers of objlist entries (allocated by user)

358:   Level: developer

360:   Note:
361:   This is needed when PETSc is used with certain languages that do garbage collection to manage object life cycles.

363: .seealso: `PetscCommDuplicate()`, `PetscObjectDestroy()`
364: @*/
365: PetscErrorCode PetscObjectsListGetGlobalNumbering(MPI_Comm comm, PetscInt len, PetscObject *objlist, PetscInt *count, PetscInt *numbering)
366: {
367:   PetscInt    i, roots, offset;
368:   PetscMPIInt size, rank;

370:   PetscFunctionBegin;
371:   PetscAssertPointer(objlist, 3);
372:   if (!count && !numbering) PetscFunctionReturn(PETSC_SUCCESS);

374:   PetscCallMPI(MPI_Comm_size(comm, &size));
375:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
376:   roots = 0;
377:   for (i = 0; i < len; ++i) {
378:     PetscMPIInt srank;
379:     PetscCallMPI(MPI_Comm_rank(objlist[i]->comm, &srank));
380:     /* Am I the root of the i-th subcomm? */
381:     if (!srank) ++roots;
382:   }
383:   if (count) {
384:     /* Obtain the sum of all roots -- the global number of distinct subcomms. */
385:     PetscCallMPI(MPIU_Allreduce(&roots, count, 1, MPIU_INT, MPI_SUM, comm));
386:   }
387:   if (numbering) {
388:     /* Introduce a global numbering for subcomms, initially known only by subcomm roots. */
389:     /*
390:       At each subcomm root number all of the subcomms it owns locally
391:       and make it global by calculating the shift among all of the roots.
392:       The roots are ordered using the comm ordering.
393:     */
394:     PetscCallMPI(MPI_Scan(&roots, &offset, 1, MPIU_INT, MPI_SUM, comm));
395:     offset -= roots;
396:     /* Now we are ready to broadcast global subcomm numbers within each subcomm.*/
397:     /*
398:       This is where the assumption of a deadlock-free ordering of the subcomms is assumed:
399:       broadcast is collective on the subcomm.
400:     */
401:     roots = 0;
402:     for (i = 0; i < len; ++i) {
403:       PetscMPIInt srank;
404:       numbering[i] = offset + roots; /* only meaningful if !srank. */

406:       PetscCallMPI(MPI_Comm_rank(objlist[i]->comm, &srank));
407:       PetscCallMPI(MPI_Bcast(numbering + i, 1, MPIU_INT, 0, objlist[i]->comm));
408:       if (!srank) ++roots;
409:     }
410:   }
411:   PetscFunctionReturn(PETSC_SUCCESS);
412: }