Actual source code: sfwindow.c
1: #include <petsc/private/sfimpl.h>
3: typedef struct _n_PetscSFDataLink *PetscSFDataLink;
4: typedef struct _n_PetscSFWinLink *PetscSFWinLink;
6: typedef struct {
7: PetscSFWindowSyncType sync; /* FENCE, LOCK, or ACTIVE synchronization */
8: PetscSFDataLink link; /* List of MPI data types, lazily constructed for each data type */
9: PetscSFWinLink wins; /* List of active windows */
10: PetscSFWindowFlavorType flavor; /* Current PETSCSF_WINDOW_FLAVOR_ */
11: PetscSF dynsf;
12: MPI_Info info;
13: } PetscSF_Window;
15: struct _n_PetscSFDataLink {
16: MPI_Datatype unit;
17: MPI_Datatype *mine;
18: MPI_Datatype *remote;
19: PetscSFDataLink next;
20: };
22: struct _n_PetscSFWinLink {
23: PetscBool inuse;
24: size_t bytes;
25: void *addr;
26: void *paddr;
27: MPI_Win win;
28: MPI_Request *reqs;
29: PetscSFWindowFlavorType flavor;
30: MPI_Aint *dyn_target_addr;
31: PetscBool epoch;
32: PetscSFWinLink next;
33: };
35: const char *const PetscSFWindowSyncTypes[] = {"FENCE", "LOCK", "ACTIVE", "PetscSFWindowSyncType", "PETSCSF_WINDOW_SYNC_", NULL};
36: const char *const PetscSFWindowFlavorTypes[] = {"CREATE", "DYNAMIC", "ALLOCATE", "SHARED", "PetscSFWindowFlavorType", "PETSCSF_WINDOW_FLAVOR_", NULL};
38: /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPI_Allreduce) */
39: static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op)
40: {
41: if (*op == MPIU_SUM) *op = MPI_SUM;
42: else if (*op == MPIU_MAX) *op = MPI_MAX;
43: else if (*op == MPIU_MIN) *op = MPI_MIN;
44: return 0;
45: }
47: /*@C
48: PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank
50: Not Collective
52: Input Parameters:
53: + sf - star forest of type `PETSCSFWINDOW`
54: - unit - data type for each node
56: Output Parameters:
57: + localtypes - types describing part of local leaf buffer referencing each remote rank
58: - remotetypes - types describing part of remote root buffer referenced for each remote rank
60: Level: developer
62: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetGraph()`, `PetscSFView()`
63: @*/
64: static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf, MPI_Datatype unit, const MPI_Datatype **localtypes, const MPI_Datatype **remotetypes)
65: {
66: PetscSF_Window *w = (PetscSF_Window *)sf->data;
67: PetscSFDataLink link;
68: PetscInt i, nranks;
69: const PetscInt *roffset, *rmine, *rremote;
70: const PetscMPIInt *ranks;
72: /* Look for types in cache */
73: for (link = w->link; link; link = link->next) {
74: PetscBool match;
75: MPIPetsc_Type_compare(unit, link->unit, &match);
76: if (match) {
77: *localtypes = link->mine;
78: *remotetypes = link->remote;
79: return 0;
80: }
81: }
83: /* Create new composite types for each send rank */
84: PetscSFGetRootRanks(sf, &nranks, &ranks, &roffset, &rmine, &rremote);
85: PetscNew(&link);
86: MPI_Type_dup(unit, &link->unit);
87: PetscMalloc2(nranks, &link->mine, nranks, &link->remote);
88: for (i = 0; i < nranks; i++) {
89: PetscInt rcount = roffset[i + 1] - roffset[i];
90: PetscMPIInt *rmine, *rremote;
91: #if !defined(PETSC_USE_64BIT_INDICES)
92: rmine = sf->rmine + sf->roffset[i];
93: rremote = sf->rremote + sf->roffset[i];
94: #else
95: PetscInt j;
96: PetscMalloc2(rcount, &rmine, rcount, &rremote);
97: for (j = 0; j < rcount; j++) {
98: PetscMPIIntCast(sf->rmine[sf->roffset[i] + j], rmine + j);
99: PetscMPIIntCast(sf->rremote[sf->roffset[i] + j], rremote + j);
100: }
101: #endif
103: MPI_Type_create_indexed_block(rcount, 1, rmine, link->unit, &link->mine[i]);
104: MPI_Type_create_indexed_block(rcount, 1, rremote, link->unit, &link->remote[i]);
105: #if defined(PETSC_USE_64BIT_INDICES)
106: PetscFree2(rmine, rremote);
107: #endif
108: MPI_Type_commit(&link->mine[i]);
109: MPI_Type_commit(&link->remote[i]);
110: }
111: link->next = w->link;
112: w->link = link;
114: *localtypes = link->mine;
115: *remotetypes = link->remote;
116: return 0;
117: }
119: /*@C
120: PetscSFWindowSetFlavorType - Set flavor type for `MPI_Win` creation
122: Logically Collective
124: Input Parameters:
125: + sf - star forest for communication of type `PETSCSFWINDOW`
126: - flavor - flavor type
128: Options Database Key:
129: . -sf_window_flavor <flavor> - sets the flavor type CREATE, DYNAMIC, ALLOCATE or SHARED (see `PetscSFWindowFlavorType`)
131: Level: advanced
133: Notes:
134: Windows reuse follows these rules:
135: .vb
136: PETSCSF_WINDOW_FLAVOR_CREATE: creates a new window every time, uses MPI_Win_create
138: PETSCSF_WINDOW_FLAVOR_DYNAMIC: uses MPI_Win_create_dynamic/MPI_Win_attach and tries to reuse windows by comparing the root array. Intended to be used on repeated applications of the same SF, e.g.
139: for i=1 to K
140: PetscSFOperationBegin(rootdata1,leafdata_whatever);
141: PetscSFOperationEnd(rootdata1,leafdata_whatever);
142: ...
143: PetscSFOperationBegin(rootdataN,leafdata_whatever);
144: PetscSFOperationEnd(rootdataN,leafdata_whatever);
145: endfor
146: The following pattern will instead raise an error
147: PetscSFOperationBegin(rootdata1,leafdata_whatever);
148: PetscSFOperationEnd(rootdata1,leafdata_whatever);
149: PetscSFOperationBegin(rank ? rootdata1 : rootdata2,leafdata_whatever);
150: PetscSFOperationEnd(rank ? rootdata1 : rootdata2,leafdata_whatever);
152: PETSCSF_WINDOW_FLAVOR_ALLOCATE: uses MPI_Win_allocate, reuses any pre-existing window which fits the data and it is not in use
154: PETSCSF_WINDOW_FLAVOR_SHARED: uses MPI_Win_allocate_shared, reusage policy as for PETSCSF_WINDOW_FLAVOR_ALLOCATE
155: .ve
157: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetFlavorType()`
158: @*/
159: PetscErrorCode PetscSFWindowSetFlavorType(PetscSF sf, PetscSFWindowFlavorType flavor)
160: {
163: PetscTryMethod(sf, "PetscSFWindowSetFlavorType_C", (PetscSF, PetscSFWindowFlavorType), (sf, flavor));
164: return 0;
165: }
167: static PetscErrorCode PetscSFWindowSetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType flavor)
168: {
169: PetscSF_Window *w = (PetscSF_Window *)sf->data;
171: w->flavor = flavor;
172: return 0;
173: }
175: /*@C
176: PetscSFWindowGetFlavorType - Get `PETSCSFWINDOW` flavor type for `PetscSF` communication
178: Logically Collective
180: Input Parameter:
181: . sf - star forest for communication of type `PETSCSFWINDOW`
183: Output Parameter:
184: . flavor - flavor type
186: Level: advanced
188: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetFlavorType()`
189: @*/
190: PetscErrorCode PetscSFWindowGetFlavorType(PetscSF sf, PetscSFWindowFlavorType *flavor)
191: {
194: PetscUseMethod(sf, "PetscSFWindowGetFlavorType_C", (PetscSF, PetscSFWindowFlavorType *), (sf, flavor));
195: return 0;
196: }
198: static PetscErrorCode PetscSFWindowGetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType *flavor)
199: {
200: PetscSF_Window *w = (PetscSF_Window *)sf->data;
202: *flavor = w->flavor;
203: return 0;
204: }
206: /*@C
207: PetscSFWindowSetSyncType - Set synchronization type for `PetscSF` communication of type `PETSCSFWINDOW`
209: Logically Collective
211: Input Parameters:
212: + sf - star forest for communication
213: - sync - synchronization type
215: Options Database Key:
216: . -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see `PetscSFWindowSyncType`)
218: Level: advanced
220: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetSyncType()`, `PetscSFWindowSyncType`
221: @*/
222: PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf, PetscSFWindowSyncType sync)
223: {
226: PetscTryMethod(sf, "PetscSFWindowSetSyncType_C", (PetscSF, PetscSFWindowSyncType), (sf, sync));
227: return 0;
228: }
230: static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf, PetscSFWindowSyncType sync)
231: {
232: PetscSF_Window *w = (PetscSF_Window *)sf->data;
234: w->sync = sync;
235: return 0;
236: }
238: /*@C
239: PetscSFWindowGetSyncType - Get synchronization type for `PetscSF` communication of type `PETSCSFWINDOW`
241: Logically Collective
243: Input Parameter:
244: . sf - star forest for communication
246: Output Parameter:
247: . sync - synchronization type
249: Level: advanced
251: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetSyncType()`, `PetscSFWindowSyncType`
252: @*/
253: PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf, PetscSFWindowSyncType *sync)
254: {
257: PetscUseMethod(sf, "PetscSFWindowGetSyncType_C", (PetscSF, PetscSFWindowSyncType *), (sf, sync));
258: return 0;
259: }
261: static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf, PetscSFWindowSyncType *sync)
262: {
263: PetscSF_Window *w = (PetscSF_Window *)sf->data;
265: *sync = w->sync;
266: return 0;
267: }
269: /*@C
270: PetscSFWindowSetInfo - Set the `MPI_Info` handle that will be used for subsequent windows allocation
272: Logically Collective
274: Input Parameters:
275: + sf - star forest for communication
276: - info - `MPI_Info` handle
278: Level: advanced
280: Note:
281: The info handle is duplicated with a call to `MPI_Info_dup()` unless info = `MPI_INFO_NULL`.
283: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetInfo()`
284: @*/
285: PetscErrorCode PetscSFWindowSetInfo(PetscSF sf, MPI_Info info)
286: {
288: PetscTryMethod(sf, "PetscSFWindowSetInfo_C", (PetscSF, MPI_Info), (sf, info));
289: return 0;
290: }
292: static PetscErrorCode PetscSFWindowSetInfo_Window(PetscSF sf, MPI_Info info)
293: {
294: PetscSF_Window *w = (PetscSF_Window *)sf->data;
296: if (w->info != MPI_INFO_NULL) MPI_Info_free(&w->info);
297: if (info != MPI_INFO_NULL) MPI_Info_dup(info, &w->info);
298: return 0;
299: }
301: /*@C
302: PetscSFWindowGetInfo - Get the `MPI_Info` handle used for windows allocation
304: Logically Collective
306: Input Parameter:
307: . sf - star forest for communication
309: Output Parameter:
310: . info - `MPI_Info` handle
312: Level: advanced
314: Note:
315: If `PetscSFWindowSetInfo()` has not be called, this returns `MPI_INFO_NULL`
317: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetInfo()`
318: @*/
319: PetscErrorCode PetscSFWindowGetInfo(PetscSF sf, MPI_Info *info)
320: {
323: PetscUseMethod(sf, "PetscSFWindowGetInfo_C", (PetscSF, MPI_Info *), (sf, info));
324: return 0;
325: }
327: static PetscErrorCode PetscSFWindowGetInfo_Window(PetscSF sf, MPI_Info *info)
328: {
329: PetscSF_Window *w = (PetscSF_Window *)sf->data;
331: *info = w->info;
332: return 0;
333: }
335: /*
336: PetscSFGetWindow - Get a window for use with a given data type
338: Collective
340: Input Parameters:
341: + sf - star forest
342: . unit - data type
343: . array - array to be sent
344: . sync - type of synchronization `PetscSFWindowSyncType`
345: . epoch - `PETSC_TRUE` to acquire the window and start an epoch, `PETSC_FALSE` to just acquire the window
346: . fenceassert - assert parameter for call to `MPI_Win_fence()`, if sync == `PETSCSF_WINDOW_SYNC_FENCE`
347: . postassert - assert parameter for call to `MPI_Win_post()`, if sync == `PETSCSF_WINDOW_SYNC_ACTIVE`
348: - startassert - assert parameter for call to `MPI_Win_start()`, if sync == `PETSCSF_WINDOW_SYNC_ACTIVE`
350: Output Parameters:
351: + target_disp - target_disp argument for RMA calls (significative for `PETSCSF_WINDOW_FLAVOR_DYNAMIC` only)
352: + reqs - array of requests (significative for sync == `PETSCSF_WINDOW_SYNC_LOCK` only)
353: - win - window
355: Level: developer
357: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFGetRootRanks()`, `PetscSFWindowGetDataTypes()`
358: */
359: static PetscErrorCode PetscSFGetWindow(PetscSF sf, MPI_Datatype unit, void *array, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscMPIInt postassert, PetscMPIInt startassert, const MPI_Aint **target_disp, MPI_Request **reqs, MPI_Win *win)
360: {
361: PetscSF_Window *w = (PetscSF_Window *)sf->data;
362: MPI_Aint lb, lb_true, bytes, bytes_true;
363: PetscSFWinLink link;
364: #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
365: MPI_Aint winaddr;
366: PetscInt nranks;
367: #endif
368: PetscBool reuse = PETSC_FALSE, update = PETSC_FALSE;
369: PetscBool dummy[2];
370: MPI_Aint wsize;
372: MPI_Type_get_extent(unit, &lb, &bytes);
373: MPI_Type_get_true_extent(unit, &lb_true, &bytes_true);
376: if (w->flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
377: for (link = w->wins; reuse && link; link = link->next) {
378: PetscBool winok = PETSC_FALSE;
379: if (w->flavor != link->flavor) continue;
380: switch (w->flavor) {
381: case PETSCSF_WINDOW_FLAVOR_DYNAMIC: /* check available matching array, error if in use (we additionally check that the matching condition is the same across processes) */
382: if (array == link->addr) {
383: if (PetscDefined(USE_DEBUG)) {
384: dummy[0] = PETSC_TRUE;
385: dummy[1] = PETSC_TRUE;
386: MPI_Allreduce(MPI_IN_PLACE, dummy, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)sf));
387: MPI_Allreduce(MPI_IN_PLACE, dummy + 1, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf));
389: }
392: winok = PETSC_TRUE;
393: link->paddr = array;
394: } else if (PetscDefined(USE_DEBUG)) {
395: dummy[0] = PETSC_FALSE;
396: dummy[1] = PETSC_FALSE;
397: MPI_Allreduce(MPI_IN_PLACE, dummy, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)sf));
398: MPI_Allreduce(MPI_IN_PLACE, dummy + 1, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf));
400: }
401: break;
402: case PETSCSF_WINDOW_FLAVOR_ALLOCATE: /* check available by matching size, allocate if in use */
403: case PETSCSF_WINDOW_FLAVOR_SHARED:
404: if (!link->inuse && bytes == (MPI_Aint)link->bytes) {
405: update = PETSC_TRUE;
406: link->paddr = array;
407: winok = PETSC_TRUE;
408: }
409: break;
410: default:
411: SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for flavor %s", PetscSFWindowFlavorTypes[w->flavor]);
412: }
413: if (winok) {
414: *win = link->win;
415: PetscInfo(sf, "Reusing window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n", link->win, link->flavor, PetscObjectComm((PetscObject)sf));
416: goto found;
417: }
418: }
420: wsize = (MPI_Aint)bytes * sf->nroots;
421: PetscNew(&link);
422: link->bytes = bytes;
423: link->next = w->wins;
424: link->flavor = w->flavor;
425: link->dyn_target_addr = NULL;
426: link->reqs = NULL;
427: w->wins = link;
428: if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
429: PetscInt i;
431: PetscMalloc1(sf->nranks, &link->reqs);
432: for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL;
433: }
434: switch (w->flavor) {
435: case PETSCSF_WINDOW_FLAVOR_CREATE:
436: MPI_Win_create(array, wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->win);
437: link->addr = array;
438: link->paddr = array;
439: break;
440: #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
441: case PETSCSF_WINDOW_FLAVOR_DYNAMIC:
442: MPI_Win_create_dynamic(w->info, PetscObjectComm((PetscObject)sf), &link->win);
443: #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) /* some OpenMPI versions do not support MPI_Win_attach(win,NULL,0); */
444: MPI_Win_attach(link->win, wsize ? array : (void *)dummy, wsize);
445: #else
446: MPI_Win_attach(link->win, array, wsize);
447: #endif
448: link->addr = array;
449: link->paddr = array;
451: PetscSFSetUp(w->dynsf);
452: PetscSFGetRootRanks(w->dynsf, &nranks, NULL, NULL, NULL, NULL);
453: PetscMalloc1(nranks, &link->dyn_target_addr);
454: MPI_Get_address(array, &winaddr);
455: PetscSFBcastBegin(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE);
456: PetscSFBcastEnd(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE);
457: break;
458: case PETSCSF_WINDOW_FLAVOR_ALLOCATE:
459: MPI_Win_allocate(wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->addr, &link->win);
460: update = PETSC_TRUE;
461: link->paddr = array;
462: break;
463: #endif
464: #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
465: case PETSCSF_WINDOW_FLAVOR_SHARED:
466: MPI_Win_allocate_shared(wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->addr, &link->win);
467: update = PETSC_TRUE;
468: link->paddr = array;
469: break;
470: #endif
471: default:
472: SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for flavor %s", PetscSFWindowFlavorTypes[w->flavor]);
473: }
474: PetscInfo(sf, "New window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n", link->win, link->flavor, PetscObjectComm((PetscObject)sf));
475: *win = link->win;
477: found:
479: if (target_disp) *target_disp = link->dyn_target_addr;
480: if (reqs) *reqs = link->reqs;
481: if (update) { /* locks are needed for the "separate" memory model only, the fence guaranties memory-synchronization */
482: PetscMPIInt rank;
484: MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);
485: if (sync == PETSCSF_WINDOW_SYNC_LOCK) MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, MPI_MODE_NOCHECK, *win);
486: PetscMemcpy(link->addr, array, sf->nroots * bytes);
487: if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
488: MPI_Win_unlock(rank, *win);
489: MPI_Win_fence(0, *win);
490: }
491: }
492: link->inuse = PETSC_TRUE;
493: link->epoch = epoch;
494: if (epoch) {
495: switch (sync) {
496: case PETSCSF_WINDOW_SYNC_FENCE:
497: MPI_Win_fence(fenceassert, *win);
498: break;
499: case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
500: break;
501: case PETSCSF_WINDOW_SYNC_ACTIVE: {
502: MPI_Group ingroup, outgroup;
503: PetscMPIInt isize, osize;
505: /* OpenMPI 4.0.2 with btl=vader does not like calling
506: - MPI_Win_complete when ogroup is empty
507: - MPI_Win_wait when igroup is empty
508: So, we do not even issue the corresponding start and post calls
509: The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
510: start(outgroup) has a matching post(ingroup)
511: and this is guaranteed by PetscSF
512: */
513: PetscSFGetGroups(sf, &ingroup, &outgroup);
514: MPI_Group_size(ingroup, &isize);
515: MPI_Group_size(outgroup, &osize);
516: if (isize) MPI_Win_post(ingroup, postassert, *win);
517: if (osize) MPI_Win_start(outgroup, startassert, *win);
518: } break;
519: default:
520: SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Unknown synchronization type");
521: }
522: }
523: return 0;
524: }
526: /*
527: PetscSFFindWindow - Finds a window that is already in use
529: Not Collective
531: Input Parameters:
532: + sf - star forest
533: . unit - data type
534: - array - array with which the window is associated
536: Output Parameters:
537: + win - window
538: - reqs - outstanding requests associated to the window
540: Level: developer
542: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
543: */
544: static PetscErrorCode PetscSFFindWindow(PetscSF sf, MPI_Datatype unit, const void *array, MPI_Win *win, MPI_Request **reqs)
545: {
546: PetscSF_Window *w = (PetscSF_Window *)sf->data;
547: PetscSFWinLink link;
549: *win = MPI_WIN_NULL;
550: for (link = w->wins; link; link = link->next) {
551: if (array == link->paddr) {
552: PetscInfo(sf, "Window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n", link->win, link->flavor, PetscObjectComm((PetscObject)sf));
553: *win = link->win;
554: *reqs = link->reqs;
555: return 0;
556: }
557: }
558: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use");
559: }
561: /*
562: PetscSFRestoreWindow - Restores a window obtained with `PetscSFGetWindow()`
564: Collective
566: Input Parameters:
567: + sf - star forest
568: . unit - data type
569: . array - array associated with window
570: . sync - type of synchronization `PetscSFWindowSyncType`
571: . epoch - close an epoch, must match argument to `PetscSFGetWindow()`
572: . update - if we have to update the local window array
573: - win - window
575: Level: developer
577: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFFindWindow()`
578: */
579: static PetscErrorCode PetscSFRestoreWindow(PetscSF sf, MPI_Datatype unit, void *array, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscBool update, MPI_Win *win)
580: {
581: PetscSF_Window *w = (PetscSF_Window *)sf->data;
582: PetscSFWinLink *p, link;
583: PetscBool reuse = PETSC_FALSE;
584: PetscSFWindowFlavorType flavor;
585: void *laddr;
586: size_t bytes;
588: for (p = &w->wins; *p; p = &(*p)->next) {
589: link = *p;
590: if (*win == link->win) {
592: if (epoch != link->epoch) {
594: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Restoring window without ending epoch");
595: }
596: laddr = link->addr;
597: flavor = link->flavor;
598: bytes = link->bytes;
599: if (flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
600: else {
601: *p = link->next;
602: update = PETSC_FALSE;
603: } /* remove from list */
604: goto found;
605: }
606: }
607: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use");
609: found:
610: PetscInfo(sf, "Window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n", link->win, link->flavor, PetscObjectComm((PetscObject)sf));
611: if (epoch) {
612: switch (sync) {
613: case PETSCSF_WINDOW_SYNC_FENCE:
614: MPI_Win_fence(fenceassert, *win);
615: break;
616: case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
617: break;
618: case PETSCSF_WINDOW_SYNC_ACTIVE: {
619: MPI_Group ingroup, outgroup;
620: PetscMPIInt isize, osize;
622: /* OpenMPI 4.0.2 with btl=wader does not like calling
623: - MPI_Win_complete when ogroup is empty
624: - MPI_Win_wait when igroup is empty
625: The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
626: - each process who issues a call to MPI_Win_start issues a call to MPI_Win_Complete
627: - each process who issues a call to MPI_Win_post issues a call to MPI_Win_Wait
628: */
629: PetscSFGetGroups(sf, &ingroup, &outgroup);
630: MPI_Group_size(ingroup, &isize);
631: MPI_Group_size(outgroup, &osize);
632: if (osize) MPI_Win_complete(*win);
633: if (isize) MPI_Win_wait(*win);
634: } break;
635: default:
636: SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Unknown synchronization type");
637: }
638: }
639: if (update) {
640: if (sync == PETSCSF_WINDOW_SYNC_LOCK) MPI_Win_fence(MPI_MODE_NOPUT | MPI_MODE_NOSUCCEED, *win);
641: PetscMemcpy(array, laddr, sf->nroots * bytes);
642: }
643: link->epoch = PETSC_FALSE;
644: link->inuse = PETSC_FALSE;
645: link->paddr = NULL;
646: if (!reuse) {
647: PetscFree(link->dyn_target_addr);
648: PetscFree(link->reqs);
649: MPI_Win_free(&link->win);
650: PetscFree(link);
651: *win = MPI_WIN_NULL;
652: }
653: return 0;
654: }
656: static PetscErrorCode PetscSFSetUp_Window(PetscSF sf)
657: {
658: PetscSF_Window *w = (PetscSF_Window *)sf->data;
659: MPI_Group ingroup, outgroup;
661: PetscSFSetUpRanks(sf, MPI_GROUP_EMPTY);
662: if (!w->dynsf) {
663: PetscInt i;
664: PetscSFNode *remotes;
666: PetscMalloc1(sf->nranks, &remotes);
667: for (i = 0; i < sf->nranks; i++) {
668: remotes[i].rank = sf->ranks[i];
669: remotes[i].index = 0;
670: }
671: PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &w->dynsf);
672: PetscSFWindowSetFlavorType(w->dynsf, PETSCSF_WINDOW_FLAVOR_CREATE); /* break recursion */
673: PetscSFSetGraph(w->dynsf, 1, sf->nranks, NULL, PETSC_OWN_POINTER, remotes, PETSC_OWN_POINTER);
674: }
675: switch (w->sync) {
676: case PETSCSF_WINDOW_SYNC_ACTIVE:
677: PetscSFGetGroups(sf, &ingroup, &outgroup);
678: default:
679: break;
680: }
681: return 0;
682: }
684: static PetscErrorCode PetscSFSetFromOptions_Window(PetscSF sf, PetscOptionItems *PetscOptionsObject)
685: {
686: PetscSF_Window *w = (PetscSF_Window *)sf->data;
687: PetscSFWindowFlavorType flavor = w->flavor;
689: PetscOptionsHeadBegin(PetscOptionsObject, "PetscSF Window options");
690: PetscOptionsEnum("-sf_window_sync", "synchronization type to use for PetscSF Window communication", "PetscSFWindowSetSyncType", PetscSFWindowSyncTypes, (PetscEnum)w->sync, (PetscEnum *)&w->sync, NULL);
691: PetscOptionsEnum("-sf_window_flavor", "flavor to use for PetscSF Window creation", "PetscSFWindowSetFlavorType", PetscSFWindowFlavorTypes, (PetscEnum)flavor, (PetscEnum *)&flavor, NULL);
692: PetscSFWindowSetFlavorType(sf, flavor);
693: PetscOptionsHeadEnd();
694: return 0;
695: }
697: static PetscErrorCode PetscSFReset_Window(PetscSF sf)
698: {
699: PetscSF_Window *w = (PetscSF_Window *)sf->data;
700: PetscSFDataLink link, next;
701: PetscSFWinLink wlink, wnext;
702: PetscInt i;
704: for (link = w->link; link; link = next) {
705: next = link->next;
706: MPI_Type_free(&link->unit);
707: for (i = 0; i < sf->nranks; i++) {
708: MPI_Type_free(&link->mine[i]);
709: MPI_Type_free(&link->remote[i]);
710: }
711: PetscFree2(link->mine, link->remote);
712: PetscFree(link);
713: }
714: w->link = NULL;
715: for (wlink = w->wins; wlink; wlink = wnext) {
716: wnext = wlink->next;
718: PetscFree(wlink->dyn_target_addr);
719: PetscFree(wlink->reqs);
720: MPI_Win_free(&wlink->win);
721: PetscFree(wlink);
722: }
723: w->wins = NULL;
724: PetscSFDestroy(&w->dynsf);
725: if (w->info != MPI_INFO_NULL) MPI_Info_free(&w->info);
726: return 0;
727: }
729: static PetscErrorCode PetscSFDestroy_Window(PetscSF sf)
730: {
731: PetscSFReset_Window(sf);
732: PetscFree(sf->data);
733: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", NULL);
734: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", NULL);
735: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", NULL);
736: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", NULL);
737: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", NULL);
738: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", NULL);
739: return 0;
740: }
742: static PetscErrorCode PetscSFView_Window(PetscSF sf, PetscViewer viewer)
743: {
744: PetscSF_Window *w = (PetscSF_Window *)sf->data;
745: PetscBool iascii;
746: PetscViewerFormat format;
748: PetscViewerGetFormat(viewer, &format);
749: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
750: if (iascii) {
751: PetscViewerASCIIPrintf(viewer, " current flavor=%s synchronization=%s MultiSF sort=%s\n", PetscSFWindowFlavorTypes[w->flavor], PetscSFWindowSyncTypes[w->sync], sf->rankorder ? "rank-order" : "unordered");
752: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
753: if (w->info != MPI_INFO_NULL) {
754: PetscMPIInt k, nkeys;
755: char key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL];
757: MPI_Info_get_nkeys(w->info, &nkeys);
758: PetscViewerASCIIPrintf(viewer, " current info with %d keys. Ordered key-value pairs follow:\n", nkeys);
759: for (k = 0; k < nkeys; k++) {
760: PetscMPIInt flag;
762: MPI_Info_get_nthkey(w->info, k, key);
763: MPI_Info_get(w->info, key, MPI_MAX_INFO_VAL, value, &flag);
765: PetscViewerASCIIPrintf(viewer, " %s = %s\n", key, value);
766: }
767: } else {
768: PetscViewerASCIIPrintf(viewer, " current info=MPI_INFO_NULL\n");
769: }
770: }
771: }
772: return 0;
773: }
775: static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf, PetscSFDuplicateOption opt, PetscSF newsf)
776: {
777: PetscSF_Window *w = (PetscSF_Window *)sf->data;
778: PetscSFWindowSyncType synctype;
780: synctype = w->sync;
781: /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */
782: if (!sf->setupcalled) synctype = PETSCSF_WINDOW_SYNC_LOCK;
783: PetscSFWindowSetSyncType(newsf, synctype);
784: PetscSFWindowSetFlavorType(newsf, w->flavor);
785: PetscSFWindowSetInfo(newsf, w->info);
786: return 0;
787: }
789: static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
790: {
791: PetscSF_Window *w = (PetscSF_Window *)sf->data;
792: PetscInt i, nranks;
793: const PetscMPIInt *ranks;
794: const MPI_Aint *target_disp;
795: const MPI_Datatype *mine, *remote;
796: MPI_Request *reqs;
797: MPI_Win win;
800: PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL);
801: PetscSFWindowGetDataTypes(sf, unit, &mine, &remote);
802: PetscSFGetWindow(sf, unit, (void *)rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE, MPI_MODE_NOPUT, 0, &target_disp, &reqs, &win);
803: for (i = 0; i < nranks; i++) {
804: MPI_Aint tdp = target_disp ? target_disp[i] : 0;
806: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
807: MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win);
808: #if defined(PETSC_HAVE_MPI_RGET)
809: MPI_Rget(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win, &reqs[i]);
810: #else
811: MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win);
812: #endif
813: } else {
814: MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win);
815: }
816: }
817: return 0;
818: }
820: PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
821: {
822: PetscSF_Window *w = (PetscSF_Window *)sf->data;
823: MPI_Win win;
824: MPI_Request *reqs = NULL;
826: PetscSFFindWindow(sf, unit, rootdata, &win, &reqs);
827: if (reqs) MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE);
828: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
829: PetscInt i, nranks;
830: const PetscMPIInt *ranks;
832: PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL);
833: for (i = 0; i < nranks; i++) MPI_Win_unlock(ranks[i], win);
834: }
835: PetscSFRestoreWindow(sf, unit, (void *)rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSTORE | MPI_MODE_NOSUCCEED, PETSC_FALSE, &win);
836: return 0;
837: }
839: PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
840: {
841: PetscSF_Window *w = (PetscSF_Window *)sf->data;
842: PetscInt i, nranks;
843: const PetscMPIInt *ranks;
844: const MPI_Aint *target_disp;
845: const MPI_Datatype *mine, *remote;
846: MPI_Win win;
848: PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL);
849: PetscSFWindowGetDataTypes(sf, unit, &mine, &remote);
850: PetscSFWindowOpTranslate(&op);
851: PetscSFGetWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win);
852: for (i = 0; i < nranks; i++) {
853: MPI_Aint tdp = target_disp ? target_disp[i] : 0;
855: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win);
856: MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win);
857: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) MPI_Win_unlock(ranks[i], win);
858: }
859: return 0;
860: }
862: static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
863: {
864: PetscSF_Window *w = (PetscSF_Window *)sf->data;
865: MPI_Win win;
866: MPI_Request *reqs = NULL;
868: PetscSFFindWindow(sf, unit, rootdata, &win, &reqs);
869: if (reqs) MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE);
870: PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win);
871: return 0;
872: }
874: static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op)
875: {
876: PetscInt i, nranks;
877: const PetscMPIInt *ranks;
878: const MPI_Datatype *mine, *remote;
879: const MPI_Aint *target_disp;
880: MPI_Win win;
881: PetscSF_Window *w = (PetscSF_Window *)sf->data;
882: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
883: PetscSFWindowFlavorType oldf;
884: #endif
886: PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL);
887: PetscSFWindowGetDataTypes(sf, unit, &mine, &remote);
888: PetscSFWindowOpTranslate(&op);
889: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
890: /* FetchAndOp without MPI_Get_Accumulate requires locking.
891: we create a new window every time to not interfere with user-defined MPI_Info which may have used "no_locks"="true" */
892: oldf = w->flavor;
893: w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
894: PetscSFGetWindow(sf, unit, rootdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, 0, 0, &target_disp, NULL, &win);
895: #else
896: PetscSFGetWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win);
897: #endif
898: for (i = 0; i < nranks; i++) {
899: MPI_Aint tdp = target_disp ? target_disp[i] : 0;
901: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
902: MPI_Win_lock(MPI_LOCK_EXCLUSIVE, ranks[i], 0, win);
903: MPI_Get(leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], win);
904: MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win);
905: MPI_Win_unlock(ranks[i], win);
906: #else
907: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], 0, win);
908: MPI_Get_accumulate((void *)leafdata, 1, mine[i], leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win);
909: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) MPI_Win_unlock(ranks[i], win);
910: #endif
911: }
912: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
913: w->flavor = oldf;
914: #endif
915: return 0;
916: }
918: static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
919: {
920: MPI_Win win;
921: #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
922: PetscSF_Window *w = (PetscSF_Window *)sf->data;
923: #endif
924: MPI_Request *reqs = NULL;
926: PetscSFFindWindow(sf, unit, rootdata, &win, &reqs);
927: if (reqs) MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE);
928: #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
929: PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win);
930: #else
931: PetscSFRestoreWindow(sf, unit, rootdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, PETSC_TRUE, &win);
932: #endif
933: return 0;
934: }
936: PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf)
937: {
938: PetscSF_Window *w = (PetscSF_Window *)sf->data;
940: sf->ops->SetUp = PetscSFSetUp_Window;
941: sf->ops->SetFromOptions = PetscSFSetFromOptions_Window;
942: sf->ops->Reset = PetscSFReset_Window;
943: sf->ops->Destroy = PetscSFDestroy_Window;
944: sf->ops->View = PetscSFView_Window;
945: sf->ops->Duplicate = PetscSFDuplicate_Window;
946: sf->ops->BcastBegin = PetscSFBcastBegin_Window;
947: sf->ops->BcastEnd = PetscSFBcastEnd_Window;
948: sf->ops->ReduceBegin = PetscSFReduceBegin_Window;
949: sf->ops->ReduceEnd = PetscSFReduceEnd_Window;
950: sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window;
951: sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Window;
953: PetscNew(&w);
954: sf->data = (void *)w;
955: w->sync = PETSCSF_WINDOW_SYNC_FENCE;
956: w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
957: w->info = MPI_INFO_NULL;
959: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", PetscSFWindowSetSyncType_Window);
960: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", PetscSFWindowGetSyncType_Window);
961: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", PetscSFWindowSetFlavorType_Window);
962: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", PetscSFWindowGetFlavorType_Window);
963: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", PetscSFWindowSetInfo_Window);
964: PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", PetscSFWindowGetInfo_Window);
966: #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6))
967: {
968: PetscBool ackbug = PETSC_FALSE;
969: PetscOptionsGetBool(NULL, NULL, "-acknowledge_ompi_onesided_bug", &ackbug, NULL);
970: if (ackbug) {
971: PetscInfo(sf, "Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n");
972: } else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_LIB, "Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed");
973: }
974: #endif
975: return 0;
976: }