Actual source code: sfwindow.c

  1: #include <petsc/private/sfimpl.h>

  3: typedef struct _n_PetscSFDataLink *PetscSFDataLink;
  4: typedef struct _n_PetscSFWinLink  *PetscSFWinLink;

  6: typedef struct {
  7:   PetscSFWindowSyncType   sync;   /* FENCE, LOCK, or ACTIVE synchronization */
  8:   PetscSFDataLink         link;   /* List of MPI data types, lazily constructed for each data type */
  9:   PetscSFWinLink          wins;   /* List of active windows */
 10:   PetscSFWindowFlavorType flavor; /* Current PETSCSF_WINDOW_FLAVOR_ */
 11:   PetscSF                 dynsf;
 12:   MPI_Info                info;
 13:   MPI_Comm                window_comm;
 14:   PetscBool               is_empty;
 15:   PetscMPIInt            *wcommranks;
 16: } PetscSF_Window;

 18: struct _n_PetscSFDataLink {
 19:   MPI_Datatype    unit;
 20:   MPI_Datatype   *mine;
 21:   MPI_Datatype   *remote;
 22:   PetscSFDataLink next;
 23: };

 25: struct _n_PetscSFWinLink {
 26:   PetscBool               inuse;
 27:   MPI_Aint                bytes;
 28:   void                   *addr;
 29:   void                   *rootdata;
 30:   void                   *leafdata;
 31:   MPI_Win                 win;
 32:   MPI_Request            *reqs;
 33:   PetscSFWindowFlavorType flavor;
 34:   MPI_Aint               *dyn_target_addr;
 35:   PetscBool               epoch;
 36:   PetscBool               persistent;
 37:   PetscSFWinLink          next;
 38: };

 40: const char *const PetscSFWindowSyncTypes[]   = {"FENCE", "LOCK", "ACTIVE", "PetscSFWindowSyncType", "PETSCSF_WINDOW_SYNC_", NULL};
 41: const char *const PetscSFWindowFlavorTypes[] = {"CREATE", "DYNAMIC", "ALLOCATE", "SHARED", "PetscSFWindowFlavorType", "PETSCSF_WINDOW_FLAVOR_", NULL};

 43: /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPI_Allreduce) */
 44: static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op)
 45: {
 46:   PetscFunctionBegin;
 47:   if (*op == MPIU_SUM) *op = MPI_SUM;
 48:   else if (*op == MPIU_MAX) *op = MPI_MAX;
 49:   else if (*op == MPIU_MIN) *op = MPI_MIN;
 50:   PetscFunctionReturn(PETSC_SUCCESS);
 51: }

 53: /*
 54:    PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank

 56:    Not Collective

 58:    Input Parameters:
 59: +  sf - star forest of type `PETSCSFWINDOW`
 60: -  unit - data type for each node

 62:    Output Parameters:
 63: +  localtypes - types describing part of local leaf buffer referencing each remote rank
 64: -  remotetypes - types describing part of remote root buffer referenced for each remote rank

 66:    Level: developer

 68: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetGraph()`, `PetscSFView()`
 69: @*/
 70: static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf, MPI_Datatype unit, const MPI_Datatype **localtypes, const MPI_Datatype **remotetypes)
 71: {
 72:   PetscSF_Window *w = (PetscSF_Window *)sf->data;
 73:   PetscSFDataLink link;
 74:   PetscMPIInt     nranks;
 75:   const PetscInt *roffset;

 77:   PetscFunctionBegin;
 78:   /* Look for types in cache */
 79:   for (link = w->link; link; link = link->next) {
 80:     PetscBool match;

 82:     PetscCall(MPIPetsc_Type_compare(unit, link->unit, &match));
 83:     if (match) {
 84:       *localtypes  = link->mine;
 85:       *remotetypes = link->remote;
 86:       PetscFunctionReturn(PETSC_SUCCESS);
 87:     }
 88:   }

 90:   /* Create new composite types for each send rank */
 91:   PetscCall(PetscSFGetRootRanks(sf, &nranks, NULL, &roffset, NULL, NULL));
 92:   PetscCall(PetscNew(&link));
 93:   PetscCallMPI(MPI_Type_dup(unit, &link->unit));
 94:   PetscCall(PetscMalloc2(nranks, &link->mine, nranks, &link->remote));
 95:   for (PetscMPIInt i = 0; i < nranks; i++) {
 96:     PetscMPIInt  rcount;
 97:     PetscMPIInt *rmine, *rremote;

 99:     PetscCall(PetscMPIIntCast(roffset[i + 1] - roffset[i], &rcount));
100: #if !defined(PETSC_USE_64BIT_INDICES)
101:     rmine   = sf->rmine + sf->roffset[i];
102:     rremote = sf->rremote + sf->roffset[i];
103: #else
104:     PetscCall(PetscMalloc2(rcount, &rmine, rcount, &rremote));
105:     for (PetscInt j = 0; j < rcount; j++) {
106:       PetscCall(PetscMPIIntCast(sf->rmine[sf->roffset[i] + j], &rmine[j]));
107:       PetscCall(PetscMPIIntCast(sf->rremote[sf->roffset[i] + j], &rremote[j]));
108:     }
109: #endif

111:     PetscCallMPI(MPI_Type_create_indexed_block(rcount, 1, rmine, link->unit, &link->mine[i]));
112:     PetscCallMPI(MPI_Type_create_indexed_block(rcount, 1, rremote, link->unit, &link->remote[i]));
113: #if defined(PETSC_USE_64BIT_INDICES)
114:     PetscCall(PetscFree2(rmine, rremote));
115: #endif
116:     PetscCallMPI(MPI_Type_commit(&link->mine[i]));
117:     PetscCallMPI(MPI_Type_commit(&link->remote[i]));
118:   }
119:   link->next = w->link;
120:   w->link    = link;

122:   *localtypes  = link->mine;
123:   *remotetypes = link->remote;
124:   PetscFunctionReturn(PETSC_SUCCESS);
125: }

127: /*@
128:   PetscSFWindowSetFlavorType - Set flavor type for `MPI_Win` creation

130:   Logically Collective

132:   Input Parameters:
133: + sf     - star forest for communication of type `PETSCSFWINDOW`
134: - flavor - flavor type

136:   Options Database Key:
137: . -sf_window_flavor <flavor> - sets the flavor type CREATE, DYNAMIC, ALLOCATE or SHARED (see `PetscSFWindowFlavorType`)

139:   Level: advanced

141:   Notes:
142:   Windows reuse follows these rules\:
143: .vb
144:      PETSCSF_WINDOW_FLAVOR_CREATE: creates a new window every time, uses MPI_Win_create

146:      PETSCSF_WINDOW_FLAVOR_DYNAMIC: uses MPI_Win_create_dynamic/MPI_Win_attach and tries to reuse windows by comparing the root array. Intended to be used on repeated applications of the same SF, e.g.
147:        PetscSFRegisterPersistent(sf,rootdata1,leafdata);
148:        for i=1 to K
149:          PetscSFOperationBegin(sf,rootdata1,leafdata);
150:          PetscSFOperationEnd(sf,rootdata1,leafdata);
151:          ...
152:          PetscSFOperationBegin(sf,rootdata1,leafdata);
153:          PetscSFOperationEnd(sf,rootdata1,leafdata);
154:        endfor
155:        PetscSFDeregisterPersistent(sf,rootdata1,leafdata);

157:        The following pattern will instead raise an error
158:          PetscSFOperationBegin(sf,rootdata1,leafdata);
159:          PetscSFOperationEnd(sf,rootdata1,leafdata);
160:          PetscSFOperationBegin(sf,rank ? rootdata1 : rootdata2,leafdata);
161:          PetscSFOperationEnd(sf,rank ? rootdata1 : rootdata2,leafdata);

163:      PETSCSF_WINDOW_FLAVOR_ALLOCATE: uses MPI_Win_allocate, reuses any pre-existing window which fits the data and it is not in use

165:      PETSCSF_WINDOW_FLAVOR_SHARED: uses MPI_Win_allocate_shared, reusage policy as for PETSCSF_WINDOW_FLAVOR_ALLOCATE
166: .ve

168: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetFlavorType()`
169: @*/
170: PetscErrorCode PetscSFWindowSetFlavorType(PetscSF sf, PetscSFWindowFlavorType flavor)
171: {
172:   PetscFunctionBegin;
175:   PetscTryMethod(sf, "PetscSFWindowSetFlavorType_C", (PetscSF, PetscSFWindowFlavorType), (sf, flavor));
176:   PetscFunctionReturn(PETSC_SUCCESS);
177: }

179: static PetscErrorCode PetscSFWindowSetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType flavor)
180: {
181:   PetscSF_Window *w = (PetscSF_Window *)sf->data;

183:   PetscFunctionBegin;
184:   w->flavor = flavor;
185:   PetscFunctionReturn(PETSC_SUCCESS);
186: }

188: /*@
189:   PetscSFWindowGetFlavorType - Get  `PETSCSFWINDOW` flavor type for `PetscSF` communication

191:   Logically Collective

193:   Input Parameter:
194: . sf - star forest for communication of type `PETSCSFWINDOW`

196:   Output Parameter:
197: . flavor - flavor type

199:   Level: advanced

201: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetFlavorType()`
202: @*/
203: PetscErrorCode PetscSFWindowGetFlavorType(PetscSF sf, PetscSFWindowFlavorType *flavor)
204: {
205:   PetscFunctionBegin;
207:   PetscAssertPointer(flavor, 2);
208:   PetscUseMethod(sf, "PetscSFWindowGetFlavorType_C", (PetscSF, PetscSFWindowFlavorType *), (sf, flavor));
209:   PetscFunctionReturn(PETSC_SUCCESS);
210: }

212: static PetscErrorCode PetscSFWindowGetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType *flavor)
213: {
214:   PetscSF_Window *w = (PetscSF_Window *)sf->data;

216:   PetscFunctionBegin;
217:   *flavor = w->flavor;
218:   PetscFunctionReturn(PETSC_SUCCESS);
219: }

221: /*@
222:   PetscSFWindowSetSyncType - Set synchronization type for `PetscSF` communication of type  `PETSCSFWINDOW`

224:   Logically Collective

226:   Input Parameters:
227: + sf   - star forest for communication
228: - sync - synchronization type

230:   Options Database Key:
231: . -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see `PetscSFWindowSyncType`)

233:   Level: advanced

235: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetSyncType()`, `PetscSFWindowSyncType`
236: @*/
237: PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf, PetscSFWindowSyncType sync)
238: {
239:   PetscFunctionBegin;
242:   PetscTryMethod(sf, "PetscSFWindowSetSyncType_C", (PetscSF, PetscSFWindowSyncType), (sf, sync));
243:   PetscFunctionReturn(PETSC_SUCCESS);
244: }

246: static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf, PetscSFWindowSyncType sync)
247: {
248:   PetscSF_Window *w = (PetscSF_Window *)sf->data;

250:   PetscFunctionBegin;
251:   w->sync = sync;
252:   PetscFunctionReturn(PETSC_SUCCESS);
253: }

255: /*@
256:   PetscSFWindowGetSyncType - Get synchronization type for `PetscSF` communication of type `PETSCSFWINDOW`

258:   Logically Collective

260:   Input Parameter:
261: . sf - star forest for communication

263:   Output Parameter:
264: . sync - synchronization type

266:   Level: advanced

268: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetSyncType()`, `PetscSFWindowSyncType`
269: @*/
270: PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf, PetscSFWindowSyncType *sync)
271: {
272:   PetscFunctionBegin;
274:   PetscAssertPointer(sync, 2);
275:   PetscUseMethod(sf, "PetscSFWindowGetSyncType_C", (PetscSF, PetscSFWindowSyncType *), (sf, sync));
276:   PetscFunctionReturn(PETSC_SUCCESS);
277: }

279: static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf, PetscSFWindowSyncType *sync)
280: {
281:   PetscSF_Window *w = (PetscSF_Window *)sf->data;

283:   PetscFunctionBegin;
284:   *sync = w->sync;
285:   PetscFunctionReturn(PETSC_SUCCESS);
286: }

288: /*@C
289:   PetscSFWindowSetInfo - Set the `MPI_Info` handle that will be used for subsequent windows allocation

291:   Logically Collective

293:   Input Parameters:
294: + sf   - star forest for communication
295: - info - `MPI_Info` handle

297:   Level: advanced

299:   Note:
300:   The info handle is duplicated with a call to `MPI_Info_dup()` unless info = `MPI_INFO_NULL`.

302: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetInfo()`
303: @*/
304: PetscErrorCode PetscSFWindowSetInfo(PetscSF sf, MPI_Info info)
305: {
306:   PetscFunctionBegin;
308:   PetscTryMethod(sf, "PetscSFWindowSetInfo_C", (PetscSF, MPI_Info), (sf, info));
309:   PetscFunctionReturn(PETSC_SUCCESS);
310: }

312: static PetscErrorCode PetscSFWindowSetInfo_Window(PetscSF sf, MPI_Info info)
313: {
314:   PetscSF_Window *w = (PetscSF_Window *)sf->data;

316:   PetscFunctionBegin;
317:   if (w->info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_free(&w->info));
318:   if (info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_dup(info, &w->info));
319:   PetscFunctionReturn(PETSC_SUCCESS);
320: }

322: /*@C
323:   PetscSFWindowGetInfo - Get the `MPI_Info` handle used for windows allocation

325:   Logically Collective

327:   Input Parameter:
328: . sf - star forest for communication

330:   Output Parameter:
331: . info - `MPI_Info` handle

333:   Level: advanced

335:   Note:
336:   If `PetscSFWindowSetInfo()` has not be called, this returns `MPI_INFO_NULL`

338: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetInfo()`
339: @*/
340: PetscErrorCode PetscSFWindowGetInfo(PetscSF sf, MPI_Info *info)
341: {
342:   PetscFunctionBegin;
344:   PetscAssertPointer(info, 2);
345:   PetscUseMethod(sf, "PetscSFWindowGetInfo_C", (PetscSF, MPI_Info *), (sf, info));
346:   PetscFunctionReturn(PETSC_SUCCESS);
347: }

349: static PetscErrorCode PetscSFWindowGetInfo_Window(PetscSF sf, MPI_Info *info)
350: {
351:   PetscSF_Window *w = (PetscSF_Window *)sf->data;

353:   PetscFunctionBegin;
354:   *info = w->info;
355:   PetscFunctionReturn(PETSC_SUCCESS);
356: }

358: static PetscErrorCode PetscSFWindowCreateDynamicSF(PetscSF sf, PetscSF *dynsf)
359: {
360:   PetscSFNode *remotes;

362:   PetscFunctionBegin;
363:   PetscCall(PetscMalloc1(sf->nranks, &remotes));
364:   for (PetscInt i = 0; i < sf->nranks; i++) {
365:     remotes[i].rank  = sf->ranks[i];
366:     remotes[i].index = 0;
367:   }
368:   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, dynsf));
369:   PetscCall(PetscSFSetType(*dynsf, PETSCSFBASIC)); /* break recursion */
370:   PetscCall(PetscSFSetGraph(*dynsf, 1, sf->nranks, NULL, PETSC_OWN_POINTER, remotes, PETSC_OWN_POINTER));
371:   PetscFunctionReturn(PETSC_SUCCESS);
372: }

374: static PetscErrorCode PetscSFWindowAttach(PetscSF sf, PetscSFWinLink link, void *rootdata, size_t wsize)
375: {
376:   PetscFunctionBegin;
377: #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
378:   {
379:     PetscSF_Window *w = (PetscSF_Window *)sf->data;
380:     MPI_Comm        wcomm;
381:     MPI_Aint        winaddr;
382:     void           *addr = rootdata;
383:     PetscMPIInt     nranks;
384:     // some Open MPI versions do not support MPI_Win_attach(win,NULL,0);
385:     wcomm = w->window_comm;
386:     if (addr != NULL) PetscCallMPI(MPI_Win_attach(link->win, addr, wsize));
387:     link->addr = addr;
388:     PetscCheck(w->dynsf, wcomm, PETSC_ERR_ORDER, "Must call PetscSFSetUp()");
389:     PetscCall(PetscSFGetRootRanks(w->dynsf, &nranks, NULL, NULL, NULL, NULL));
390:     PetscCallMPI(MPI_Get_address(addr, &winaddr));
391:     if (!link->dyn_target_addr) PetscCall(PetscMalloc1(nranks, &link->dyn_target_addr));
392:     PetscCall(PetscSFBcastBegin(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE));
393:     PetscCall(PetscSFBcastEnd(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE));
394:   }
395: #else
396:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "dynamic windows not supported");
397: #endif
398:   PetscFunctionReturn(PETSC_SUCCESS);
399: }

401: /*
402:    PetscSFGetWindow - Get a window for use with a given data type

404:    Collective

406:    Input Parameters:
407: +  sf - star forest
408: .  unit - data type
409: .  rootdata - array to be sent
410: .  leafdata - only used to help uniquely identify windows
411: .  sync - type of synchronization `PetscSFWindowSyncType`
412: .  epoch - `PETSC_TRUE` to acquire the window and start an epoch, `PETSC_FALSE` to just acquire the window
413: .  fenceassert - assert parameter for call to `MPI_Win_fence()`, if sync == `PETSCSF_WINDOW_SYNC_FENCE`
414: .  postassert - assert parameter for call to `MPI_Win_post()`, if sync == `PETSCSF_WINDOW_SYNC_ACTIVE`
415: -  startassert - assert parameter for call to `MPI_Win_start()`, if sync == `PETSCSF_WINDOW_SYNC_ACTIVE`

417:    Output Parameters:
418: +  target_disp - target_disp argument for RMA calls (significative for `PETSCSF_WINDOW_FLAVOR_DYNAMIC` only)
419: +  reqs - array of requests (significative for sync == `PETSCSF_WINDOW_SYNC_LOCK` only)
420: -  win - window

422:    Level: developer

424: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFGetRootRanks()`, `PetscSFWindowGetDataTypes()`
425: */

427: static PetscErrorCode PetscSFGetWindow(PetscSF sf, MPI_Datatype unit, void *rootdata, void *leafdata, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscMPIInt postassert, PetscMPIInt startassert, const MPI_Aint **target_disp, MPI_Request **reqs, MPI_Win *win)
428: {
429:   PetscSF_Window *w = (PetscSF_Window *)sf->data;
430:   MPI_Aint        bytes;
431:   PetscSFWinLink  link;
432:   PetscBool       reuse = PETSC_FALSE, update = PETSC_FALSE;
433:   MPI_Aint        wsize;
434:   MPI_Comm        wcomm;
435:   PetscBool       is_empty;

437:   PetscFunctionBegin;
438:   PetscCall(PetscSFGetDatatypeSize_Internal(PetscObjectComm((PetscObject)sf), unit, &bytes));
439:   wsize    = (MPI_Aint)(bytes * sf->nroots);
440:   wcomm    = w->window_comm;
441:   is_empty = w->is_empty;
442:   if (is_empty) {
443:     if (target_disp) *target_disp = NULL;
444:     if (reqs) *reqs = NULL;
445:     *win = MPI_WIN_NULL;
446:     PetscFunctionReturn(PETSC_SUCCESS);
447:   }
448:   if (w->flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
449:   if (PetscDefined(HAVE_MPI_FEATURE_DYNAMIC_WINDOW) && w->flavor == PETSCSF_WINDOW_FLAVOR_DYNAMIC) {
450:     // first search for a persistent window
451:     for (link = w->wins; reuse && link; link = link->next) {
452:       PetscBool match;

454:       if (!link->persistent) continue;
455:       match = (link->flavor == w->flavor && link->rootdata == rootdata && link->leafdata == leafdata) ? PETSC_TRUE : PETSC_FALSE;
456:       if (PetscDefined(USE_DEBUG)) {
457:         PetscInt matches[2];
458:         PetscInt all_matches[2];

460:         matches[0] = match ? 1 : 0;
461:         matches[1] = match ? -1 : 0;
462:         PetscCallMPI(MPIU_Allreduce(matches, all_matches, 2, MPIU_INT, MPI_MAX, wcomm));
463:         all_matches[1] = -all_matches[1];
464:         PetscCheck(all_matches[0] == all_matches[1], wcomm, PETSC_ERR_ARG_INCOMP,
465:                    "Inconsistent use across MPI processes of persistent leaf and root data registered with PetscSFRegisterPersistent().\n"
466:                    "Either the persistent data was changed on a subset of processes (which is not allowed),\n"
467:                    "or persistent data was not deregistered with PetscSFDeregisterPersistent() before being deallocated");
468:       }
469:       if (match) {
470:         PetscCheck(!link->inuse, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Communication already in progress on persistent root and leaf data");
471:         PetscCheck(!epoch || !link->epoch, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Communication epoch already open for window");
472:         PetscCheck(bytes == link->bytes, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Wrong data type for persistent root and leaf data");
473:         *win = link->win;
474:         goto found;
475:       }
476:     }
477:   }
478:   for (link = w->wins; reuse && link; link = link->next) {
479:     if (w->flavor != link->flavor) continue;
480:     /* an existing window can be used (1) if it is not in use, (2) if we are
481:        not asking to start an epoch or it does not have an already started
482:        epoch, and (3) if it is the right size */
483:     if (!link->inuse && (!epoch || !link->epoch) && bytes == (MPI_Aint)link->bytes) {
484:       if (w->flavor == PETSCSF_WINDOW_FLAVOR_DYNAMIC) {
485:         PetscCall(PetscSFWindowAttach(sf, link, rootdata, wsize));
486:       } else {
487:         update = PETSC_TRUE;
488:       }
489:       link->rootdata = rootdata;
490:       link->leafdata = leafdata;
491:       PetscCall(PetscInfo(sf, "Reusing window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)wcomm));
492:       *win = link->win;
493:       goto found;
494:     }
495:   }

497:   PetscCall(PetscNew(&link));
498:   link->bytes           = bytes;
499:   link->next            = w->wins;
500:   link->flavor          = w->flavor;
501:   link->dyn_target_addr = NULL;
502:   link->reqs            = NULL;
503:   w->wins               = link;
504:   link->rootdata        = rootdata;
505:   link->leafdata        = leafdata;
506:   if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
507:     PetscCall(PetscMalloc1(sf->nranks, &link->reqs));
508:     for (PetscMPIInt i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL;
509:   }
510:   switch (w->flavor) {
511:   case PETSCSF_WINDOW_FLAVOR_CREATE:
512:     PetscCallMPI(MPI_Win_create(rootdata, wsize, (PetscMPIInt)bytes, w->info, wcomm, &link->win));
513:     link->addr = rootdata;
514:     break;
515: #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
516:   case PETSCSF_WINDOW_FLAVOR_DYNAMIC:
517:     PetscCallMPI(MPI_Win_create_dynamic(w->info, wcomm, &link->win));
518:     PetscCall(PetscSFWindowAttach(sf, link, rootdata, wsize));
519:     break;
520:   case PETSCSF_WINDOW_FLAVOR_ALLOCATE:
521:     PetscCallMPI(MPI_Win_allocate(wsize, (PetscMPIInt)bytes, w->info, wcomm, &link->addr, &link->win));
522:     update = PETSC_TRUE;
523:     break;
524: #endif
525: #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
526:   case PETSCSF_WINDOW_FLAVOR_SHARED:
527:     PetscCallMPI(MPI_Win_allocate_shared(wsize, (PetscMPIInt)bytes, w->info, wcomm, &link->addr, &link->win));
528:     update = PETSC_TRUE;
529:     break;
530: #endif
531:   default:
532:     SETERRQ(wcomm, PETSC_ERR_SUP, "No support for flavor %s", PetscSFWindowFlavorTypes[w->flavor]);
533:   }
534:   PetscCall(PetscInfo(sf, "New window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)wcomm));
535:   *win = link->win;

537: found:

539:   if (target_disp) *target_disp = link->dyn_target_addr;
540:   if (reqs) *reqs = link->reqs;
541:   if (update) { /* locks are needed for the "separate" memory model only, the fence guarantees memory-synchronization */
542:     PetscMPIInt rank;

544:     PetscCallMPI(MPI_Comm_rank(wcomm, &rank));
545:     if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, MPI_MODE_NOCHECK, *win));
546:     PetscCall(PetscMemcpy(link->addr, rootdata, sf->nroots * bytes));
547:     if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
548:       PetscCallMPI(MPI_Win_unlock(rank, *win));
549:       PetscCallMPI(MPI_Win_fence(0, *win));
550:     }
551:   }
552:   link->inuse = PETSC_TRUE;
553:   link->epoch = epoch;
554:   if (epoch) {
555:     switch (sync) {
556:     case PETSCSF_WINDOW_SYNC_FENCE:
557:       PetscCallMPI(MPI_Win_fence(fenceassert, *win));
558:       break;
559:     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
560:       break;
561:     case PETSCSF_WINDOW_SYNC_ACTIVE: {
562:       MPI_Group   ingroup, outgroup;
563:       PetscMPIInt isize, osize;

565:       /* Open MPI 4.0.2 with btl=vader does not like calling
566:          - MPI_Win_complete when ogroup is empty
567:          - MPI_Win_wait when igroup is empty
568:          So, we do not even issue the corresponding start and post calls
569:          The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
570:          start(outgroup) has a matching post(ingroup)
571:          and this is guaranteed by PetscSF
572:       */
573:       PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup));
574:       PetscCallMPI(MPI_Group_size(ingroup, &isize));
575:       PetscCallMPI(MPI_Group_size(outgroup, &osize));
576:       if (isize) PetscCallMPI(MPI_Win_post(ingroup, postassert, *win));
577:       if (osize) PetscCallMPI(MPI_Win_start(outgroup, startassert, *win));
578:     } break;
579:     default:
580:       SETERRQ(wcomm, PETSC_ERR_PLIB, "Unknown synchronization type");
581:     }
582:   }
583:   PetscFunctionReturn(PETSC_SUCCESS);
584: }

586: /*
587:    PetscSFFindWindow - Finds a window that is already in use

589:    Not Collective

591:    Input Parameters:
592: +  sf - star forest
593: .  unit - data type
594: .  rootdata - array with which the window is associated
595: -  leafdata - only used to help uniquely identify windows

597:    Output Parameters:
598: +  win - window
599: -  reqs - outstanding requests associated to the window

601:    Level: developer

603: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
604: */
605: static PetscErrorCode PetscSFFindWindow(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata, MPI_Win *win, MPI_Request **reqs)
606: {
607:   PetscSF_Window *w = (PetscSF_Window *)sf->data;
608:   PetscSFWinLink  link;
609:   PetscBool       is_empty;
610:   MPI_Aint        bytes;

612:   PetscFunctionBegin;
613:   PetscCall(PetscSFGetDatatypeSize_Internal(PetscObjectComm((PetscObject)sf), unit, &bytes));
614:   *win     = MPI_WIN_NULL;
615:   is_empty = w->is_empty;
616:   if (is_empty) {
617:     *reqs = NULL;
618:     *win  = MPI_WIN_NULL;
619:     PetscFunctionReturn(PETSC_SUCCESS);
620:   }
621:   for (link = w->wins; link; link = link->next) {
622:     if (rootdata == link->rootdata && leafdata == link->leafdata && bytes == link->bytes) {
623:       PetscCall(PetscInfo(sf, "Window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)w->window_comm));
624:       *win  = link->win;
625:       *reqs = link->reqs;
626:       PetscFunctionReturn(PETSC_SUCCESS);
627:     }
628:   }
629:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use");
630: }

632: /*
633:    PetscSFRestoreWindow - Restores a window obtained with `PetscSFGetWindow()`

635:    Collective

637:    Input Parameters:
638: +  sf - star forest
639: .  unit - data type
640: .  array - array associated with window
641: .  sync - type of synchronization `PetscSFWindowSyncType`
642: .  epoch - close an epoch, must match argument to `PetscSFGetWindow()`
643: .  update - if we have to update the local window array
644: -  win - window

646:    Level: developer

648: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFFindWindow()`
649: */
650: static PetscErrorCode PetscSFRestoreWindow(PetscSF sf, MPI_Datatype unit, void *array, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscBool update, MPI_Win *win)
651: {
652:   PetscSF_Window         *w = (PetscSF_Window *)sf->data;
653:   PetscSFWinLink         *p, link;
654:   PetscBool               reuse = PETSC_FALSE;
655:   PetscSFWindowFlavorType flavor;
656:   void                   *laddr;
657:   MPI_Aint                bytes;
658:   MPI_Comm                wcomm;

660:   PetscFunctionBegin;
661:   if (*win == MPI_WIN_NULL) PetscFunctionReturn(PETSC_SUCCESS);
662:   wcomm = w->window_comm;
663:   for (p = &w->wins; *p; p = &(*p)->next) {
664:     link = *p;
665:     if (*win == link->win) {
666:       PetscCheck(array == link->rootdata, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Matched window, but not array");
667:       if (epoch != link->epoch) {
668:         PetscCheck(!epoch, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "No epoch to end");
669:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Restoring window without ending epoch");
670:       }
671:       laddr  = link->addr;
672:       flavor = link->flavor;
673:       bytes  = link->bytes;
674:       if (flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
675:       else {
676:         *p     = link->next;
677:         update = PETSC_FALSE;
678:       } /* remove from list */
679:       goto found;
680:     }
681:   }
682:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use");

684: found:
685:   PetscCall(PetscInfo(sf, "Window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)wcomm));
686:   if (epoch) {
687:     switch (sync) {
688:     case PETSCSF_WINDOW_SYNC_FENCE:
689:       PetscCallMPI(MPI_Win_fence(fenceassert, *win));
690:       break;
691:     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
692:       break;
693:     case PETSCSF_WINDOW_SYNC_ACTIVE: {
694:       MPI_Group   ingroup, outgroup;
695:       PetscMPIInt isize, osize;

697:       /* Open MPI 4.0.2 with btl=wader does not like calling
698:          - MPI_Win_complete when ogroup is empty
699:          - MPI_Win_wait when igroup is empty
700:          The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
701:          - each process who issues a call to MPI_Win_start issues a call to MPI_Win_Complete
702:          - each process who issues a call to MPI_Win_post issues a call to MPI_Win_Wait
703:       */
704:       PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup));
705:       PetscCallMPI(MPI_Group_size(ingroup, &isize));
706:       PetscCallMPI(MPI_Group_size(outgroup, &osize));
707:       if (osize) PetscCallMPI(MPI_Win_complete(*win));
708:       if (isize) PetscCallMPI(MPI_Win_wait(*win));
709:     } break;
710:     default:
711:       SETERRQ(wcomm, PETSC_ERR_PLIB, "Unknown synchronization type");
712:     }
713:   }
714: #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
715:   if (link->flavor == PETSCSF_WINDOW_FLAVOR_DYNAMIC && !link->persistent) {
716:     if (link->addr != NULL) PetscCallMPI(MPI_Win_detach(link->win, link->addr));
717:     link->addr = NULL;
718:   }
719: #endif
720:   if (update) {
721:     if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_fence(MPI_MODE_NOPUT | MPI_MODE_NOSUCCEED, *win));
722:     PetscCall(PetscMemcpy(array, laddr, sf->nroots * bytes));
723:   }
724:   link->epoch = PETSC_FALSE;
725:   link->inuse = PETSC_FALSE;
726:   if (!link->persistent) {
727:     link->rootdata = NULL;
728:     link->leafdata = NULL;
729:   }
730:   if (!reuse) {
731:     PetscCall(PetscFree(link->dyn_target_addr));
732:     PetscCall(PetscFree(link->reqs));
733:     PetscCallMPI(MPI_Win_free(&link->win));
734:     PetscCall(PetscFree(link));
735:     *win = MPI_WIN_NULL;
736:   }
737:   PetscFunctionReturn(PETSC_SUCCESS);
738: }

740: static PetscErrorCode PetscSFSetUp_Window(PetscSF sf)
741: {
742:   PetscSF_Window *w = (PetscSF_Window *)sf->data;
743:   MPI_Group       ingroup, outgroup;
744:   MPI_Comm        comm;

746:   PetscFunctionBegin;
747:   PetscCall(PetscSFSetUpRanks(sf, MPI_GROUP_EMPTY));
748:   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
749:   if (w->window_comm == MPI_COMM_NULL) {
750:     PetscInt    nroots, nleaves, nranks;
751:     PetscBool   has_empty;
752:     PetscMPIInt wcommrank;
753:     PetscSF     dynsf_full = NULL;

755:     if (w->flavor == PETSCSF_WINDOW_FLAVOR_DYNAMIC) PetscCall(PetscSFWindowCreateDynamicSF(sf, &dynsf_full));

757:     PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, NULL, NULL));
758:     has_empty = (nroots == 0 && nleaves == 0) ? PETSC_TRUE : PETSC_FALSE;
759:     nranks    = sf->nranks;
760:     PetscCall(PetscMalloc1(nranks, &w->wcommranks));
761:     w->is_empty = has_empty;
762:     PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, &has_empty, 1, MPIU_BOOL, MPI_LOR, comm));
763:     if (has_empty) {
764:       PetscMPIInt  rank;
765:       MPI_Comm     raw_comm;
766:       PetscSFNode *remotes;

768:       PetscCallMPI(MPI_Comm_rank(comm, &rank));
769:       PetscCallMPI(MPI_Comm_split(comm, w->is_empty ? 1 : 0, rank, &raw_comm));
770:       PetscCall(PetscCommDuplicate(raw_comm, &w->window_comm, NULL));
771:       PetscCallMPI(MPI_Comm_free(&raw_comm));

773:       PetscCallMPI(MPI_Comm_rank(w->window_comm, &wcommrank));
774:       if (!dynsf_full) PetscCall(PetscSFWindowCreateDynamicSF(sf, &dynsf_full));
775:       PetscCall(PetscSFBcastBegin(dynsf_full, MPI_INT, &wcommrank, w->wcommranks, MPI_REPLACE));
776:       PetscCall(PetscSFBcastEnd(dynsf_full, MPI_INT, &wcommrank, w->wcommranks, MPI_REPLACE));

778:       if (w->flavor == PETSCSF_WINDOW_FLAVOR_DYNAMIC) {
779:         PetscCall(PetscSFCreate(w->window_comm, &w->dynsf));
780:         PetscCall(PetscSFSetType(w->dynsf, PETSCSFBASIC)); /* break recursion */
781:         PetscCall(PetscMalloc1(sf->nranks, &remotes));
782:         for (PetscInt i = 0; i < sf->nranks; i++) {
783:           remotes[i].rank  = w->wcommranks[i];
784:           remotes[i].index = 0;
785:         }
786:         PetscCall(PetscSFSetGraph(w->dynsf, 1, sf->nranks, NULL, PETSC_OWN_POINTER, remotes, PETSC_OWN_POINTER));
787:       }
788:     } else {
789:       PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)sf), &w->window_comm, NULL));
790:       PetscCall(PetscArraycpy(w->wcommranks, sf->ranks, nranks));
791:       PetscCall(PetscObjectReference((PetscObject)dynsf_full));
792:       w->dynsf = dynsf_full;
793:     }
794:     if (w->dynsf) PetscCall(PetscSFSetUp(w->dynsf));
795:     PetscCall(PetscSFDestroy(&dynsf_full));
796:   }
797:   switch (w->sync) {
798:   case PETSCSF_WINDOW_SYNC_ACTIVE:
799:     PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup));
800:   default:
801:     break;
802:   }
803:   PetscFunctionReturn(PETSC_SUCCESS);
804: }

806: static PetscErrorCode PetscSFSetFromOptions_Window(PetscSF sf, PetscOptionItems *PetscOptionsObject)
807: {
808:   PetscSF_Window         *w      = (PetscSF_Window *)sf->data;
809:   PetscSFWindowFlavorType flavor = w->flavor;

811:   PetscFunctionBegin;
812:   PetscOptionsHeadBegin(PetscOptionsObject, "PetscSF Window options");
813:   PetscCall(PetscOptionsEnum("-sf_window_sync", "synchronization type to use for PetscSF Window communication", "PetscSFWindowSetSyncType", PetscSFWindowSyncTypes, (PetscEnum)w->sync, (PetscEnum *)&w->sync, NULL));
814:   PetscCall(PetscOptionsEnum("-sf_window_flavor", "flavor to use for PetscSF Window creation", "PetscSFWindowSetFlavorType", PetscSFWindowFlavorTypes, (PetscEnum)flavor, (PetscEnum *)&flavor, NULL));
815:   PetscCall(PetscSFWindowSetFlavorType(sf, flavor));
816:   PetscOptionsHeadEnd();
817:   PetscFunctionReturn(PETSC_SUCCESS);
818: }

820: static PetscErrorCode PetscSFReset_Window(PetscSF sf)
821: {
822:   PetscSF_Window *w = (PetscSF_Window *)sf->data;
823:   PetscSFDataLink link, next;
824:   PetscSFWinLink  wlink, wnext;
825:   PetscInt        i;
826:   MPI_Comm        wcomm;
827:   PetscBool       is_empty;

829:   PetscFunctionBegin;
830:   for (link = w->link; link; link = next) {
831:     next = link->next;
832:     PetscCallMPI(MPI_Type_free(&link->unit));
833:     for (i = 0; i < sf->nranks; i++) {
834:       PetscCallMPI(MPI_Type_free(&link->mine[i]));
835:       PetscCallMPI(MPI_Type_free(&link->remote[i]));
836:     }
837:     PetscCall(PetscFree2(link->mine, link->remote));
838:     PetscCall(PetscFree(link));
839:   }
840:   w->link  = NULL;
841:   wcomm    = w->window_comm;
842:   is_empty = w->is_empty;
843:   if (!is_empty) {
844:     for (wlink = w->wins; wlink; wlink = wnext) {
845:       wnext = wlink->next;
846:       PetscCheck(!wlink->inuse, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Window still in use with address %p", (void *)wlink->addr);
847:       PetscCall(PetscFree(wlink->dyn_target_addr));
848:       PetscCall(PetscFree(wlink->reqs));
849:       PetscCallMPI(MPI_Win_free(&wlink->win));
850:       PetscCall(PetscFree(wlink));
851:     }
852:   }
853:   w->wins = NULL;
854:   PetscCall(PetscSFDestroy(&w->dynsf));
855:   if (w->info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_free(&w->info));
856:   PetscCall(PetscCommDestroy(&w->window_comm));
857:   PetscCall(PetscFree(w->wcommranks));
858:   PetscFunctionReturn(PETSC_SUCCESS);
859: }

861: static PetscErrorCode PetscSFRegisterPersistent_Window(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
862: {
863:   PetscSF_Window *w = (PetscSF_Window *)sf->data;
864:   MPI_Aint        bytes, wsize;
865:   PetscBool       is_empty;
866:   PetscSFWinLink  link;

868:   PetscFunctionBegin;
869:   PetscCall(PetscSFSetUp(sf));
870:   if (w->flavor != PETSCSF_WINDOW_FLAVOR_DYNAMIC) PetscFunctionReturn(PETSC_SUCCESS);
871:   PetscCall(PetscSFGetDatatypeSize_Internal(PetscObjectComm((PetscObject)sf), unit, &bytes));
872:   wsize    = (MPI_Aint)(bytes * sf->nroots);
873:   is_empty = w->is_empty;
874:   if (is_empty) PetscFunctionReturn(PETSC_SUCCESS);
875:   PetscCall(PetscNew(&link));
876:   link->flavor = w->flavor;
877:   link->next   = w->wins;
878: #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
879:   {
880:     MPI_Comm wcomm = w->window_comm;
881:     PetscCallMPI(MPI_Win_create_dynamic(w->info, wcomm, &link->win));
882:   }
883: #endif
884:   PetscCall(PetscSFWindowAttach(sf, link, (void *)rootdata, wsize));
885:   link->rootdata   = (void *)rootdata;
886:   link->leafdata   = (void *)leafdata;
887:   link->bytes      = bytes;
888:   link->epoch      = PETSC_FALSE;
889:   link->inuse      = PETSC_FALSE;
890:   link->persistent = PETSC_TRUE;
891:   w->wins          = link;
892:   if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
893:     PetscInt i;

895:     PetscCall(PetscMalloc1(sf->nranks, &link->reqs));
896:     for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL;
897:   }
898:   PetscFunctionReturn(PETSC_SUCCESS);
899: }

901: static PetscErrorCode PetscSFDeregisterPersistent_Window(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
902: {
903:   PetscSF_Window *w = (PetscSF_Window *)sf->data;
904:   MPI_Aint        bytes;
905:   MPI_Comm        wcomm;
906:   PetscBool       is_empty;
907:   PetscSFWinLink *p;

909:   PetscFunctionBegin;
910:   PetscCall(PetscSFSetUp(sf));
911:   if (w->flavor != PETSCSF_WINDOW_FLAVOR_DYNAMIC) PetscFunctionReturn(PETSC_SUCCESS);
912:   PetscCall(PetscSFGetDatatypeSize_Internal(PetscObjectComm((PetscObject)sf), unit, &bytes));
913:   wcomm    = w->window_comm;
914:   is_empty = w->is_empty;
915:   if (is_empty) PetscFunctionReturn(PETSC_SUCCESS);
916:   for (p = &w->wins; *p; p = &(*p)->next) {
917:     PetscSFWinLink link = *p;
918:     if (link->flavor == w->flavor && link->persistent && link->rootdata == rootdata && link->leafdata == leafdata && link->bytes == bytes) {
919:       PetscCheck(!link->inuse, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Deregistering a window when communication is still in progress");
920:       PetscCheck(!link->epoch, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Deregistering a window with an unconcluded epoch");
921: #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
922:       PetscCallMPI(MPI_Win_detach(link->win, link->addr));
923:       link->addr = NULL;
924: #endif
925:       PetscCall(PetscFree(link->dyn_target_addr));
926:       PetscCall(PetscFree(link->reqs));
927:       PetscCallMPI(MPI_Win_free(&link->win));
928:       *p = link->next;
929:       PetscCall(PetscFree(link));
930:       break;
931:     }
932:   }
933:   PetscFunctionReturn(PETSC_SUCCESS);
934: }

936: static PetscErrorCode PetscSFDestroy_Window(PetscSF sf)
937: {
938:   PetscFunctionBegin;
939:   PetscCall(PetscSFReset_Window(sf));
940:   PetscCall(PetscFree(sf->data));
941:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", NULL));
942:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", NULL));
943:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", NULL));
944:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", NULL));
945:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", NULL));
946:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", NULL));
947:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFRegisterPersistent_C", NULL));
948:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFDeregisterPersistent_C", NULL));
949:   PetscFunctionReturn(PETSC_SUCCESS);
950: }

952: static PetscErrorCode PetscSFView_Window(PetscSF sf, PetscViewer viewer)
953: {
954:   PetscSF_Window   *w = (PetscSF_Window *)sf->data;
955:   PetscBool         iascii;
956:   PetscViewerFormat format;

958:   PetscFunctionBegin;
959:   PetscCall(PetscViewerGetFormat(viewer, &format));
960:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
961:   if (iascii) {
962:     PetscCall(PetscViewerASCIIPrintf(viewer, "  current flavor=%s synchronization=%s MultiSF sort=%s\n", PetscSFWindowFlavorTypes[w->flavor], PetscSFWindowSyncTypes[w->sync], sf->rankorder ? "rank-order" : "unordered"));
963:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
964:       if (w->info != MPI_INFO_NULL) {
965:         PetscMPIInt k, nkeys;
966:         char        key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL];

968:         PetscCallMPI(MPI_Info_get_nkeys(w->info, &nkeys));
969:         PetscCall(PetscViewerASCIIPrintf(viewer, "    current info with %d keys. Ordered key-value pairs follow:\n", nkeys));
970:         for (k = 0; k < nkeys; k++) {
971:           PetscMPIInt flag;

973:           PetscCallMPI(MPI_Info_get_nthkey(w->info, k, key));
974:           PetscCallMPI(MPI_Info_get(w->info, key, MPI_MAX_INFO_VAL, value, &flag));
975:           PetscCheck(flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing key %s", key);
976:           PetscCall(PetscViewerASCIIPrintf(viewer, "      %s = %s\n", key, value));
977:         }
978:       } else {
979:         PetscCall(PetscViewerASCIIPrintf(viewer, "    current info=MPI_INFO_NULL\n"));
980:       }
981:     }
982:   }
983:   PetscFunctionReturn(PETSC_SUCCESS);
984: }

986: static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf, PetscSFDuplicateOption opt, PetscSF newsf)
987: {
988:   PetscSF_Window       *w = (PetscSF_Window *)sf->data;
989:   PetscSFWindowSyncType synctype;

991:   PetscFunctionBegin;
992:   synctype = w->sync;
993:   /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */
994:   if (!sf->setupcalled) synctype = PETSCSF_WINDOW_SYNC_LOCK;
995:   PetscCall(PetscSFWindowSetSyncType(newsf, synctype));
996:   PetscCall(PetscSFWindowSetFlavorType(newsf, w->flavor));
997:   PetscCall(PetscSFWindowSetInfo(newsf, w->info));
998:   PetscFunctionReturn(PETSC_SUCCESS);
999: }

1001: static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1002: {
1003:   PetscSF_Window     *w = (PetscSF_Window *)sf->data;
1004:   PetscMPIInt         nranks;
1005:   const PetscMPIInt  *ranks;
1006:   const MPI_Aint     *target_disp;
1007:   const MPI_Datatype *mine, *remote;
1008:   MPI_Request        *reqs;
1009:   MPI_Win             win;

1011:   PetscFunctionBegin;
1012:   PetscCheck(op == MPI_REPLACE, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PetscSFBcastBegin_Window with op!=MPI_REPLACE has not been implemented");
1013:   PetscCall(PetscSFGetRootRanks(sf, &nranks, NULL, NULL, NULL, NULL));
1014:   PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote));
1015:   PetscCall(PetscSFGetWindow(sf, unit, (void *)rootdata, leafdata, w->sync, PETSC_TRUE, MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE, MPI_MODE_NOPUT, 0, &target_disp, &reqs, &win));
1016:   ranks = w->wcommranks;
1017:   for (PetscMPIInt i = 0; i < nranks; i++) {
1018:     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
1019:     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
1020:       PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win));
1021: #if defined(PETSC_HAVE_MPI_RGET)
1022:       PetscCallMPI(MPI_Rget(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win, &reqs[i]));
1023: #else
1024:       PetscCallMPI(MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win));
1025: #endif
1026:     } else {
1027:       CHKMEMQ;
1028:       PetscCallMPI(MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win));
1029:       CHKMEMQ;
1030:     }
1031:   }
1032:   PetscFunctionReturn(PETSC_SUCCESS);
1033: }

1035: static PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1036: {
1037:   PetscSF_Window *w = (PetscSF_Window *)sf->data;
1038:   MPI_Win         win;
1039:   MPI_Request    *reqs = NULL;

1041:   PetscFunctionBegin;
1042:   PetscCall(PetscSFFindWindow(sf, unit, rootdata, leafdata, &win, &reqs));
1043:   if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE));
1044:   if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
1045:     PetscMPIInt        nranks;
1046:     const PetscMPIInt *ranks;

1048:     PetscCall(PetscSFGetRootRanks(sf, &nranks, NULL, NULL, NULL, NULL));
1049:     ranks = w->wcommranks;
1050:     for (PetscMPIInt i = 0; i < nranks; i++) PetscCallMPI(MPI_Win_unlock(ranks[i], win));
1051:   }
1052:   PetscCall(PetscSFRestoreWindow(sf, unit, (void *)rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSTORE | MPI_MODE_NOSUCCEED, PETSC_FALSE, &win));
1053:   PetscFunctionReturn(PETSC_SUCCESS);
1054: }

1056: static PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1057: {
1058:   PetscSF_Window     *w = (PetscSF_Window *)sf->data;
1059:   PetscMPIInt         nranks;
1060:   const PetscMPIInt  *ranks;
1061:   const MPI_Aint     *target_disp;
1062:   const MPI_Datatype *mine, *remote;
1063:   MPI_Win             win;

1065:   PetscFunctionBegin;
1066:   PetscCall(PetscSFGetRootRanks(sf, &nranks, NULL, NULL, NULL, NULL));
1067:   PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote));
1068:   PetscCall(PetscSFWindowOpTranslate(&op));
1069:   PetscCall(PetscSFGetWindow(sf, unit, rootdata, (void *)leafdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win));
1070:   ranks = w->wcommranks;
1071:   for (PetscMPIInt i = 0; i < nranks; i++) {
1072:     MPI_Aint tdp = target_disp ? target_disp[i] : 0;

1074:     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win));
1075:     PetscCallMPI(MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win));
1076:     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i], win));
1077:   }
1078:   PetscFunctionReturn(PETSC_SUCCESS);
1079: }

1081: static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1082: {
1083:   PetscSF_Window *w = (PetscSF_Window *)sf->data;
1084:   MPI_Win         win;
1085:   MPI_Request    *reqs = NULL;

1087:   PetscFunctionBegin;
1088:   PetscCall(PetscSFFindWindow(sf, unit, rootdata, leafdata, &win, &reqs));
1089:   if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE));
1090:   PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win));
1091:   PetscFunctionReturn(PETSC_SUCCESS);
1092: }

1094: static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op)
1095: {
1096:   PetscMPIInt         nranks;
1097:   const PetscMPIInt  *ranks;
1098:   const MPI_Datatype *mine, *remote;
1099:   const MPI_Aint     *target_disp;
1100:   MPI_Win             win;
1101:   PetscSF_Window     *w = (PetscSF_Window *)sf->data;
1102: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
1103:   PetscSFWindowFlavorType oldf;
1104: #endif

1106:   PetscFunctionBegin;
1107:   PetscCall(PetscSFGetRootRanks(sf, &nranks, NULL, NULL, NULL, NULL));
1108:   PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote));
1109:   PetscCall(PetscSFWindowOpTranslate(&op));
1110: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
1111:   /* FetchAndOp without MPI_Get_Accumulate requires locking.
1112:      we create a new window every time to not interfere with user-defined MPI_Info which may have used "no_locks"="true" */
1113:   oldf      = w->flavor;
1114:   w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
1115:   PetscCall(PetscSFGetWindow(sf, unit, rootdata, (void *)leafdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, 0, 0, &target_disp, NULL, &win));
1116: #else
1117:   PetscCall(PetscSFGetWindow(sf, unit, rootdata, (void *)leafdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win));
1118: #endif
1119:   ranks = w->wcommranks;
1120:   for (PetscMPIInt i = 0; i < nranks; i++) {
1121:     MPI_Aint tdp = target_disp ? target_disp[i] : 0;

1123: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
1124:     PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, ranks[i], 0, win));
1125:     PetscCallMPI(MPI_Get(leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], win));
1126:     PetscCallMPI(MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win));
1127:     PetscCallMPI(MPI_Win_unlock(ranks[i], win));
1128: #else
1129:     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], 0, win));
1130:     PetscCallMPI(MPI_Get_accumulate((void *)leafdata, 1, mine[i], leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win));
1131:     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i], win));
1132: #endif
1133:   }
1134: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
1135:   w->flavor = oldf;
1136: #endif
1137:   PetscFunctionReturn(PETSC_SUCCESS);
1138: }

1140: static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1141: {
1142:   MPI_Win win;
1143: #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
1144:   PetscSF_Window *w = (PetscSF_Window *)sf->data;
1145: #endif
1146:   MPI_Request *reqs = NULL;

1148:   PetscFunctionBegin;
1149:   PetscCall(PetscSFFindWindow(sf, unit, rootdata, leafdata, &win, &reqs));
1150:   if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE));
1151: #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
1152:   PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win));
1153: #else
1154:   PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, PETSC_TRUE, &win));
1155: #endif
1156:   PetscFunctionReturn(PETSC_SUCCESS);
1157: }

1159: PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf)
1160: {
1161:   PetscSF_Window *w = (PetscSF_Window *)sf->data;

1163:   PetscFunctionBegin;
1164:   sf->ops->SetUp           = PetscSFSetUp_Window;
1165:   sf->ops->SetFromOptions  = PetscSFSetFromOptions_Window;
1166:   sf->ops->Reset           = PetscSFReset_Window;
1167:   sf->ops->Destroy         = PetscSFDestroy_Window;
1168:   sf->ops->View            = PetscSFView_Window;
1169:   sf->ops->Duplicate       = PetscSFDuplicate_Window;
1170:   sf->ops->BcastBegin      = PetscSFBcastBegin_Window;
1171:   sf->ops->BcastEnd        = PetscSFBcastEnd_Window;
1172:   sf->ops->ReduceBegin     = PetscSFReduceBegin_Window;
1173:   sf->ops->ReduceEnd       = PetscSFReduceEnd_Window;
1174:   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window;
1175:   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Window;

1177:   PetscCall(PetscNew(&w));
1178:   sf->data       = (void *)w;
1179:   w->sync        = PETSCSF_WINDOW_SYNC_FENCE;
1180:   w->flavor      = PETSCSF_WINDOW_FLAVOR_CREATE;
1181:   w->info        = MPI_INFO_NULL;
1182:   w->window_comm = MPI_COMM_NULL;

1184:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", PetscSFWindowSetSyncType_Window));
1185:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", PetscSFWindowGetSyncType_Window));
1186:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", PetscSFWindowSetFlavorType_Window));
1187:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", PetscSFWindowGetFlavorType_Window));
1188:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", PetscSFWindowSetInfo_Window));
1189:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", PetscSFWindowGetInfo_Window));
1190:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFRegisterPersistent_C", PetscSFRegisterPersistent_Window));
1191:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFDeregisterPersistent_C", PetscSFDeregisterPersistent_Window));

1193: #if defined(PETSC_HAVE_OPENMPI)
1194:   #if PETSC_PKG_OPENMPI_VERSION_LE(1, 6, 0)
1195:   {
1196:     PetscBool ackbug = PETSC_FALSE;
1197:     PetscCall(PetscOptionsGetBool(NULL, NULL, "-acknowledge_ompi_onesided_bug", &ackbug, NULL));
1198:     if (ackbug) {
1199:       PetscCall(PetscInfo(sf, "Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n"));
1200:     } else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_LIB, "Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed");
1201:   }
1202:   #endif
1203: #endif
1204:   PetscFunctionReturn(PETSC_SUCCESS);
1205: }