Actual source code: sfwindow.c
1: #include <petsc/private/sfimpl.h>
3: typedef struct _n_PetscSFDataLink *PetscSFDataLink;
4: typedef struct _n_PetscSFWinLink *PetscSFWinLink;
6: typedef struct {
7: PetscSFWindowSyncType sync; /* FENCE, LOCK, or ACTIVE synchronization */
8: PetscSFDataLink link; /* List of MPI data types, lazily constructed for each data type */
9: PetscSFWinLink wins; /* List of active windows */
10: PetscSFWindowFlavorType flavor; /* Current PETSCSF_WINDOW_FLAVOR_ */
11: PetscSF dynsf;
12: MPI_Info info;
13: MPI_Comm window_comm;
14: PetscBool is_empty;
15: PetscMPIInt *wcommranks;
16: } PetscSF_Window;
18: struct _n_PetscSFDataLink {
19: MPI_Datatype unit;
20: MPI_Datatype *mine;
21: MPI_Datatype *remote;
22: PetscSFDataLink next;
23: };
25: struct _n_PetscSFWinLink {
26: PetscBool inuse;
27: MPI_Aint bytes;
28: void *addr;
29: void *rootdata;
30: void *leafdata;
31: MPI_Win win;
32: MPI_Request *reqs;
33: PetscSFWindowFlavorType flavor;
34: MPI_Aint *dyn_target_addr;
35: PetscBool epoch;
36: PetscBool persistent;
37: PetscSFWinLink next;
38: };
40: const char *const PetscSFWindowSyncTypes[] = {"FENCE", "LOCK", "ACTIVE", "PetscSFWindowSyncType", "PETSCSF_WINDOW_SYNC_", NULL};
41: const char *const PetscSFWindowFlavorTypes[] = {"CREATE", "DYNAMIC", "ALLOCATE", "SHARED", "PetscSFWindowFlavorType", "PETSCSF_WINDOW_FLAVOR_", NULL};
43: /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPI_Allreduce) */
44: static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op)
45: {
46: PetscFunctionBegin;
47: if (*op == MPIU_SUM) *op = MPI_SUM;
48: else if (*op == MPIU_MAX) *op = MPI_MAX;
49: else if (*op == MPIU_MIN) *op = MPI_MIN;
50: PetscFunctionReturn(PETSC_SUCCESS);
51: }
53: /*
54: PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank
56: Not Collective
58: Input Parameters:
59: + sf - star forest of type `PETSCSFWINDOW`
60: - unit - data type for each node
62: Output Parameters:
63: + localtypes - types describing part of local leaf buffer referencing each remote rank
64: - remotetypes - types describing part of remote root buffer referenced for each remote rank
66: Level: developer
68: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetGraph()`, `PetscSFView()`
69: @*/
70: static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf, MPI_Datatype unit, const MPI_Datatype **localtypes, const MPI_Datatype **remotetypes)
71: {
72: PetscSF_Window *w = (PetscSF_Window *)sf->data;
73: PetscSFDataLink link;
74: PetscMPIInt nranks;
75: const PetscInt *roffset;
77: PetscFunctionBegin;
78: /* Look for types in cache */
79: for (link = w->link; link; link = link->next) {
80: PetscBool match;
82: PetscCall(MPIPetsc_Type_compare(unit, link->unit, &match));
83: if (match) {
84: *localtypes = link->mine;
85: *remotetypes = link->remote;
86: PetscFunctionReturn(PETSC_SUCCESS);
87: }
88: }
90: /* Create new composite types for each send rank */
91: PetscCall(PetscSFGetRootRanks(sf, &nranks, NULL, &roffset, NULL, NULL));
92: PetscCall(PetscNew(&link));
93: PetscCallMPI(MPI_Type_dup(unit, &link->unit));
94: PetscCall(PetscMalloc2(nranks, &link->mine, nranks, &link->remote));
95: for (PetscMPIInt i = 0; i < nranks; i++) {
96: PetscMPIInt rcount;
97: PetscMPIInt *rmine, *rremote;
99: PetscCall(PetscMPIIntCast(roffset[i + 1] - roffset[i], &rcount));
100: #if !defined(PETSC_USE_64BIT_INDICES)
101: rmine = sf->rmine + sf->roffset[i];
102: rremote = sf->rremote + sf->roffset[i];
103: #else
104: PetscCall(PetscMalloc2(rcount, &rmine, rcount, &rremote));
105: for (PetscInt j = 0; j < rcount; j++) {
106: PetscCall(PetscMPIIntCast(sf->rmine[sf->roffset[i] + j], &rmine[j]));
107: PetscCall(PetscMPIIntCast(sf->rremote[sf->roffset[i] + j], &rremote[j]));
108: }
109: #endif
111: PetscCallMPI(MPI_Type_create_indexed_block(rcount, 1, rmine, link->unit, &link->mine[i]));
112: PetscCallMPI(MPI_Type_create_indexed_block(rcount, 1, rremote, link->unit, &link->remote[i]));
113: #if defined(PETSC_USE_64BIT_INDICES)
114: PetscCall(PetscFree2(rmine, rremote));
115: #endif
116: PetscCallMPI(MPI_Type_commit(&link->mine[i]));
117: PetscCallMPI(MPI_Type_commit(&link->remote[i]));
118: }
119: link->next = w->link;
120: w->link = link;
122: *localtypes = link->mine;
123: *remotetypes = link->remote;
124: PetscFunctionReturn(PETSC_SUCCESS);
125: }
127: /*@
128: PetscSFWindowSetFlavorType - Set flavor type for `MPI_Win` creation
130: Logically Collective
132: Input Parameters:
133: + sf - star forest for communication of type `PETSCSFWINDOW`
134: - flavor - flavor type
136: Options Database Key:
137: . -sf_window_flavor <flavor> - sets the flavor type CREATE, DYNAMIC, ALLOCATE or SHARED (see `PetscSFWindowFlavorType`)
139: Level: advanced
141: Notes:
142: Windows reuse follows these rules\:
143: .vb
144: PETSCSF_WINDOW_FLAVOR_CREATE: creates a new window every time, uses MPI_Win_create
146: PETSCSF_WINDOW_FLAVOR_DYNAMIC: uses MPI_Win_create_dynamic/MPI_Win_attach and tries to reuse windows by comparing the root array. Intended to be used on repeated applications of the same SF, e.g.
147: PetscSFRegisterPersistent(sf,rootdata1,leafdata);
148: for i=1 to K
149: PetscSFOperationBegin(sf,rootdata1,leafdata);
150: PetscSFOperationEnd(sf,rootdata1,leafdata);
151: ...
152: PetscSFOperationBegin(sf,rootdata1,leafdata);
153: PetscSFOperationEnd(sf,rootdata1,leafdata);
154: endfor
155: PetscSFDeregisterPersistent(sf,rootdata1,leafdata);
157: The following pattern will instead raise an error
158: PetscSFOperationBegin(sf,rootdata1,leafdata);
159: PetscSFOperationEnd(sf,rootdata1,leafdata);
160: PetscSFOperationBegin(sf,rank ? rootdata1 : rootdata2,leafdata);
161: PetscSFOperationEnd(sf,rank ? rootdata1 : rootdata2,leafdata);
163: PETSCSF_WINDOW_FLAVOR_ALLOCATE: uses MPI_Win_allocate, reuses any pre-existing window which fits the data and it is not in use
165: PETSCSF_WINDOW_FLAVOR_SHARED: uses MPI_Win_allocate_shared, reusage policy as for PETSCSF_WINDOW_FLAVOR_ALLOCATE
166: .ve
168: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetFlavorType()`
169: @*/
170: PetscErrorCode PetscSFWindowSetFlavorType(PetscSF sf, PetscSFWindowFlavorType flavor)
171: {
172: PetscFunctionBegin;
175: PetscTryMethod(sf, "PetscSFWindowSetFlavorType_C", (PetscSF, PetscSFWindowFlavorType), (sf, flavor));
176: PetscFunctionReturn(PETSC_SUCCESS);
177: }
179: static PetscErrorCode PetscSFWindowSetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType flavor)
180: {
181: PetscSF_Window *w = (PetscSF_Window *)sf->data;
183: PetscFunctionBegin;
184: w->flavor = flavor;
185: PetscFunctionReturn(PETSC_SUCCESS);
186: }
188: /*@
189: PetscSFWindowGetFlavorType - Get `PETSCSFWINDOW` flavor type for `PetscSF` communication
191: Logically Collective
193: Input Parameter:
194: . sf - star forest for communication of type `PETSCSFWINDOW`
196: Output Parameter:
197: . flavor - flavor type
199: Level: advanced
201: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetFlavorType()`
202: @*/
203: PetscErrorCode PetscSFWindowGetFlavorType(PetscSF sf, PetscSFWindowFlavorType *flavor)
204: {
205: PetscFunctionBegin;
207: PetscAssertPointer(flavor, 2);
208: PetscUseMethod(sf, "PetscSFWindowGetFlavorType_C", (PetscSF, PetscSFWindowFlavorType *), (sf, flavor));
209: PetscFunctionReturn(PETSC_SUCCESS);
210: }
212: static PetscErrorCode PetscSFWindowGetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType *flavor)
213: {
214: PetscSF_Window *w = (PetscSF_Window *)sf->data;
216: PetscFunctionBegin;
217: *flavor = w->flavor;
218: PetscFunctionReturn(PETSC_SUCCESS);
219: }
221: /*@
222: PetscSFWindowSetSyncType - Set synchronization type for `PetscSF` communication of type `PETSCSFWINDOW`
224: Logically Collective
226: Input Parameters:
227: + sf - star forest for communication
228: - sync - synchronization type
230: Options Database Key:
231: . -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see `PetscSFWindowSyncType`)
233: Level: advanced
235: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetSyncType()`, `PetscSFWindowSyncType`
236: @*/
237: PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf, PetscSFWindowSyncType sync)
238: {
239: PetscFunctionBegin;
242: PetscTryMethod(sf, "PetscSFWindowSetSyncType_C", (PetscSF, PetscSFWindowSyncType), (sf, sync));
243: PetscFunctionReturn(PETSC_SUCCESS);
244: }
246: static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf, PetscSFWindowSyncType sync)
247: {
248: PetscSF_Window *w = (PetscSF_Window *)sf->data;
250: PetscFunctionBegin;
251: w->sync = sync;
252: PetscFunctionReturn(PETSC_SUCCESS);
253: }
255: /*@
256: PetscSFWindowGetSyncType - Get synchronization type for `PetscSF` communication of type `PETSCSFWINDOW`
258: Logically Collective
260: Input Parameter:
261: . sf - star forest for communication
263: Output Parameter:
264: . sync - synchronization type
266: Level: advanced
268: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetSyncType()`, `PetscSFWindowSyncType`
269: @*/
270: PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf, PetscSFWindowSyncType *sync)
271: {
272: PetscFunctionBegin;
274: PetscAssertPointer(sync, 2);
275: PetscUseMethod(sf, "PetscSFWindowGetSyncType_C", (PetscSF, PetscSFWindowSyncType *), (sf, sync));
276: PetscFunctionReturn(PETSC_SUCCESS);
277: }
279: static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf, PetscSFWindowSyncType *sync)
280: {
281: PetscSF_Window *w = (PetscSF_Window *)sf->data;
283: PetscFunctionBegin;
284: *sync = w->sync;
285: PetscFunctionReturn(PETSC_SUCCESS);
286: }
288: /*@C
289: PetscSFWindowSetInfo - Set the `MPI_Info` handle that will be used for subsequent windows allocation
291: Logically Collective
293: Input Parameters:
294: + sf - star forest for communication
295: - info - `MPI_Info` handle
297: Level: advanced
299: Note:
300: The info handle is duplicated with a call to `MPI_Info_dup()` unless info = `MPI_INFO_NULL`.
302: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetInfo()`
303: @*/
304: PetscErrorCode PetscSFWindowSetInfo(PetscSF sf, MPI_Info info)
305: {
306: PetscFunctionBegin;
308: PetscTryMethod(sf, "PetscSFWindowSetInfo_C", (PetscSF, MPI_Info), (sf, info));
309: PetscFunctionReturn(PETSC_SUCCESS);
310: }
312: static PetscErrorCode PetscSFWindowSetInfo_Window(PetscSF sf, MPI_Info info)
313: {
314: PetscSF_Window *w = (PetscSF_Window *)sf->data;
316: PetscFunctionBegin;
317: if (w->info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_free(&w->info));
318: if (info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_dup(info, &w->info));
319: PetscFunctionReturn(PETSC_SUCCESS);
320: }
322: /*@C
323: PetscSFWindowGetInfo - Get the `MPI_Info` handle used for windows allocation
325: Logically Collective
327: Input Parameter:
328: . sf - star forest for communication
330: Output Parameter:
331: . info - `MPI_Info` handle
333: Level: advanced
335: Note:
336: If `PetscSFWindowSetInfo()` has not be called, this returns `MPI_INFO_NULL`
338: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetInfo()`
339: @*/
340: PetscErrorCode PetscSFWindowGetInfo(PetscSF sf, MPI_Info *info)
341: {
342: PetscFunctionBegin;
344: PetscAssertPointer(info, 2);
345: PetscUseMethod(sf, "PetscSFWindowGetInfo_C", (PetscSF, MPI_Info *), (sf, info));
346: PetscFunctionReturn(PETSC_SUCCESS);
347: }
349: static PetscErrorCode PetscSFWindowGetInfo_Window(PetscSF sf, MPI_Info *info)
350: {
351: PetscSF_Window *w = (PetscSF_Window *)sf->data;
353: PetscFunctionBegin;
354: *info = w->info;
355: PetscFunctionReturn(PETSC_SUCCESS);
356: }
358: static PetscErrorCode PetscSFWindowCreateDynamicSF(PetscSF sf, PetscSF *dynsf)
359: {
360: PetscSFNode *remotes;
362: PetscFunctionBegin;
363: PetscCall(PetscMalloc1(sf->nranks, &remotes));
364: for (PetscInt i = 0; i < sf->nranks; i++) {
365: remotes[i].rank = sf->ranks[i];
366: remotes[i].index = 0;
367: }
368: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, dynsf));
369: PetscCall(PetscSFSetType(*dynsf, PETSCSFBASIC)); /* break recursion */
370: PetscCall(PetscSFSetGraph(*dynsf, 1, sf->nranks, NULL, PETSC_OWN_POINTER, remotes, PETSC_OWN_POINTER));
371: PetscFunctionReturn(PETSC_SUCCESS);
372: }
374: static PetscErrorCode PetscSFWindowAttach(PetscSF sf, PetscSFWinLink link, void *rootdata, size_t wsize)
375: {
376: PetscFunctionBegin;
377: #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
378: {
379: PetscSF_Window *w = (PetscSF_Window *)sf->data;
380: MPI_Comm wcomm;
381: MPI_Aint winaddr;
382: void *addr = rootdata;
383: PetscMPIInt nranks;
384: // some Open MPI versions do not support MPI_Win_attach(win,NULL,0);
385: wcomm = w->window_comm;
386: if (addr != NULL) PetscCallMPI(MPI_Win_attach(link->win, addr, wsize));
387: link->addr = addr;
388: PetscCheck(w->dynsf, wcomm, PETSC_ERR_ORDER, "Must call PetscSFSetUp()");
389: PetscCall(PetscSFGetRootRanks(w->dynsf, &nranks, NULL, NULL, NULL, NULL));
390: PetscCallMPI(MPI_Get_address(addr, &winaddr));
391: if (!link->dyn_target_addr) PetscCall(PetscMalloc1(nranks, &link->dyn_target_addr));
392: PetscCall(PetscSFBcastBegin(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE));
393: PetscCall(PetscSFBcastEnd(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE));
394: }
395: #else
396: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "dynamic windows not supported");
397: #endif
398: PetscFunctionReturn(PETSC_SUCCESS);
399: }
401: /*
402: PetscSFGetWindow - Get a window for use with a given data type
404: Collective
406: Input Parameters:
407: + sf - star forest
408: . unit - data type
409: . rootdata - array to be sent
410: . leafdata - only used to help uniquely identify windows
411: . sync - type of synchronization `PetscSFWindowSyncType`
412: . epoch - `PETSC_TRUE` to acquire the window and start an epoch, `PETSC_FALSE` to just acquire the window
413: . fenceassert - assert parameter for call to `MPI_Win_fence()`, if sync == `PETSCSF_WINDOW_SYNC_FENCE`
414: . postassert - assert parameter for call to `MPI_Win_post()`, if sync == `PETSCSF_WINDOW_SYNC_ACTIVE`
415: - startassert - assert parameter for call to `MPI_Win_start()`, if sync == `PETSCSF_WINDOW_SYNC_ACTIVE`
417: Output Parameters:
418: + target_disp - target_disp argument for RMA calls (significative for `PETSCSF_WINDOW_FLAVOR_DYNAMIC` only)
419: + reqs - array of requests (significative for sync == `PETSCSF_WINDOW_SYNC_LOCK` only)
420: - win - window
422: Level: developer
424: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFGetRootRanks()`, `PetscSFWindowGetDataTypes()`
425: */
427: static PetscErrorCode PetscSFGetWindow(PetscSF sf, MPI_Datatype unit, void *rootdata, void *leafdata, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscMPIInt postassert, PetscMPIInt startassert, const MPI_Aint **target_disp, MPI_Request **reqs, MPI_Win *win)
428: {
429: PetscSF_Window *w = (PetscSF_Window *)sf->data;
430: MPI_Aint bytes;
431: PetscSFWinLink link;
432: PetscBool reuse = PETSC_FALSE, update = PETSC_FALSE;
433: MPI_Aint wsize;
434: MPI_Comm wcomm;
435: PetscBool is_empty;
437: PetscFunctionBegin;
438: PetscCall(PetscSFGetDatatypeSize_Internal(PetscObjectComm((PetscObject)sf), unit, &bytes));
439: wsize = (MPI_Aint)(bytes * sf->nroots);
440: wcomm = w->window_comm;
441: is_empty = w->is_empty;
442: if (is_empty) {
443: if (target_disp) *target_disp = NULL;
444: if (reqs) *reqs = NULL;
445: *win = MPI_WIN_NULL;
446: PetscFunctionReturn(PETSC_SUCCESS);
447: }
448: if (w->flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
449: if (PetscDefined(HAVE_MPI_FEATURE_DYNAMIC_WINDOW) && w->flavor == PETSCSF_WINDOW_FLAVOR_DYNAMIC) {
450: // first search for a persistent window
451: for (link = w->wins; reuse && link; link = link->next) {
452: PetscBool match;
454: if (!link->persistent) continue;
455: match = (link->flavor == w->flavor && link->rootdata == rootdata && link->leafdata == leafdata) ? PETSC_TRUE : PETSC_FALSE;
456: if (PetscDefined(USE_DEBUG)) {
457: PetscInt matches[2];
458: PetscInt all_matches[2];
460: matches[0] = match ? 1 : 0;
461: matches[1] = match ? -1 : 0;
462: PetscCallMPI(MPIU_Allreduce(matches, all_matches, 2, MPIU_INT, MPI_MAX, wcomm));
463: all_matches[1] = -all_matches[1];
464: PetscCheck(all_matches[0] == all_matches[1], wcomm, PETSC_ERR_ARG_INCOMP,
465: "Inconsistent use across MPI processes of persistent leaf and root data registered with PetscSFRegisterPersistent().\n"
466: "Either the persistent data was changed on a subset of processes (which is not allowed),\n"
467: "or persistent data was not deregistered with PetscSFDeregisterPersistent() before being deallocated");
468: }
469: if (match) {
470: PetscCheck(!link->inuse, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Communication already in progress on persistent root and leaf data");
471: PetscCheck(!epoch || !link->epoch, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Communication epoch already open for window");
472: PetscCheck(bytes == link->bytes, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Wrong data type for persistent root and leaf data");
473: *win = link->win;
474: goto found;
475: }
476: }
477: }
478: for (link = w->wins; reuse && link; link = link->next) {
479: if (w->flavor != link->flavor) continue;
480: /* an existing window can be used (1) if it is not in use, (2) if we are
481: not asking to start an epoch or it does not have an already started
482: epoch, and (3) if it is the right size */
483: if (!link->inuse && (!epoch || !link->epoch) && bytes == (MPI_Aint)link->bytes) {
484: if (w->flavor == PETSCSF_WINDOW_FLAVOR_DYNAMIC) {
485: PetscCall(PetscSFWindowAttach(sf, link, rootdata, wsize));
486: } else {
487: update = PETSC_TRUE;
488: }
489: link->rootdata = rootdata;
490: link->leafdata = leafdata;
491: PetscCall(PetscInfo(sf, "Reusing window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)wcomm));
492: *win = link->win;
493: goto found;
494: }
495: }
497: PetscCall(PetscNew(&link));
498: link->bytes = bytes;
499: link->next = w->wins;
500: link->flavor = w->flavor;
501: link->dyn_target_addr = NULL;
502: link->reqs = NULL;
503: w->wins = link;
504: link->rootdata = rootdata;
505: link->leafdata = leafdata;
506: if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
507: PetscCall(PetscMalloc1(sf->nranks, &link->reqs));
508: for (PetscMPIInt i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL;
509: }
510: switch (w->flavor) {
511: case PETSCSF_WINDOW_FLAVOR_CREATE:
512: PetscCallMPI(MPI_Win_create(rootdata, wsize, (PetscMPIInt)bytes, w->info, wcomm, &link->win));
513: link->addr = rootdata;
514: break;
515: #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
516: case PETSCSF_WINDOW_FLAVOR_DYNAMIC:
517: PetscCallMPI(MPI_Win_create_dynamic(w->info, wcomm, &link->win));
518: PetscCall(PetscSFWindowAttach(sf, link, rootdata, wsize));
519: break;
520: case PETSCSF_WINDOW_FLAVOR_ALLOCATE:
521: PetscCallMPI(MPI_Win_allocate(wsize, (PetscMPIInt)bytes, w->info, wcomm, &link->addr, &link->win));
522: update = PETSC_TRUE;
523: break;
524: #endif
525: #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
526: case PETSCSF_WINDOW_FLAVOR_SHARED:
527: PetscCallMPI(MPI_Win_allocate_shared(wsize, (PetscMPIInt)bytes, w->info, wcomm, &link->addr, &link->win));
528: update = PETSC_TRUE;
529: break;
530: #endif
531: default:
532: SETERRQ(wcomm, PETSC_ERR_SUP, "No support for flavor %s", PetscSFWindowFlavorTypes[w->flavor]);
533: }
534: PetscCall(PetscInfo(sf, "New window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)wcomm));
535: *win = link->win;
537: found:
539: if (target_disp) *target_disp = link->dyn_target_addr;
540: if (reqs) *reqs = link->reqs;
541: if (update) { /* locks are needed for the "separate" memory model only, the fence guarantees memory-synchronization */
542: PetscMPIInt rank;
544: PetscCallMPI(MPI_Comm_rank(wcomm, &rank));
545: if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, MPI_MODE_NOCHECK, *win));
546: PetscCall(PetscMemcpy(link->addr, rootdata, sf->nroots * bytes));
547: if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
548: PetscCallMPI(MPI_Win_unlock(rank, *win));
549: PetscCallMPI(MPI_Win_fence(0, *win));
550: }
551: }
552: link->inuse = PETSC_TRUE;
553: link->epoch = epoch;
554: if (epoch) {
555: switch (sync) {
556: case PETSCSF_WINDOW_SYNC_FENCE:
557: PetscCallMPI(MPI_Win_fence(fenceassert, *win));
558: break;
559: case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
560: break;
561: case PETSCSF_WINDOW_SYNC_ACTIVE: {
562: MPI_Group ingroup, outgroup;
563: PetscMPIInt isize, osize;
565: /* Open MPI 4.0.2 with btl=vader does not like calling
566: - MPI_Win_complete when ogroup is empty
567: - MPI_Win_wait when igroup is empty
568: So, we do not even issue the corresponding start and post calls
569: The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
570: start(outgroup) has a matching post(ingroup)
571: and this is guaranteed by PetscSF
572: */
573: PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup));
574: PetscCallMPI(MPI_Group_size(ingroup, &isize));
575: PetscCallMPI(MPI_Group_size(outgroup, &osize));
576: if (isize) PetscCallMPI(MPI_Win_post(ingroup, postassert, *win));
577: if (osize) PetscCallMPI(MPI_Win_start(outgroup, startassert, *win));
578: } break;
579: default:
580: SETERRQ(wcomm, PETSC_ERR_PLIB, "Unknown synchronization type");
581: }
582: }
583: PetscFunctionReturn(PETSC_SUCCESS);
584: }
586: /*
587: PetscSFFindWindow - Finds a window that is already in use
589: Not Collective
591: Input Parameters:
592: + sf - star forest
593: . unit - data type
594: . rootdata - array with which the window is associated
595: - leafdata - only used to help uniquely identify windows
597: Output Parameters:
598: + win - window
599: - reqs - outstanding requests associated to the window
601: Level: developer
603: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
604: */
605: static PetscErrorCode PetscSFFindWindow(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata, MPI_Win *win, MPI_Request **reqs)
606: {
607: PetscSF_Window *w = (PetscSF_Window *)sf->data;
608: PetscSFWinLink link;
609: PetscBool is_empty;
610: MPI_Aint bytes;
612: PetscFunctionBegin;
613: PetscCall(PetscSFGetDatatypeSize_Internal(PetscObjectComm((PetscObject)sf), unit, &bytes));
614: *win = MPI_WIN_NULL;
615: is_empty = w->is_empty;
616: if (is_empty) {
617: *reqs = NULL;
618: *win = MPI_WIN_NULL;
619: PetscFunctionReturn(PETSC_SUCCESS);
620: }
621: for (link = w->wins; link; link = link->next) {
622: if (rootdata == link->rootdata && leafdata == link->leafdata && bytes == link->bytes) {
623: PetscCall(PetscInfo(sf, "Window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)w->window_comm));
624: *win = link->win;
625: *reqs = link->reqs;
626: PetscFunctionReturn(PETSC_SUCCESS);
627: }
628: }
629: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use");
630: }
632: /*
633: PetscSFRestoreWindow - Restores a window obtained with `PetscSFGetWindow()`
635: Collective
637: Input Parameters:
638: + sf - star forest
639: . unit - data type
640: . array - array associated with window
641: . sync - type of synchronization `PetscSFWindowSyncType`
642: . epoch - close an epoch, must match argument to `PetscSFGetWindow()`
643: . update - if we have to update the local window array
644: - win - window
646: Level: developer
648: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFFindWindow()`
649: */
650: static PetscErrorCode PetscSFRestoreWindow(PetscSF sf, MPI_Datatype unit, void *array, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscBool update, MPI_Win *win)
651: {
652: PetscSF_Window *w = (PetscSF_Window *)sf->data;
653: PetscSFWinLink *p, link;
654: PetscBool reuse = PETSC_FALSE;
655: PetscSFWindowFlavorType flavor;
656: void *laddr;
657: MPI_Aint bytes;
658: MPI_Comm wcomm;
660: PetscFunctionBegin;
661: if (*win == MPI_WIN_NULL) PetscFunctionReturn(PETSC_SUCCESS);
662: wcomm = w->window_comm;
663: for (p = &w->wins; *p; p = &(*p)->next) {
664: link = *p;
665: if (*win == link->win) {
666: PetscCheck(array == link->rootdata, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Matched window, but not array");
667: if (epoch != link->epoch) {
668: PetscCheck(!epoch, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "No epoch to end");
669: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Restoring window without ending epoch");
670: }
671: laddr = link->addr;
672: flavor = link->flavor;
673: bytes = link->bytes;
674: if (flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
675: else {
676: *p = link->next;
677: update = PETSC_FALSE;
678: } /* remove from list */
679: goto found;
680: }
681: }
682: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use");
684: found:
685: PetscCall(PetscInfo(sf, "Window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)wcomm));
686: if (epoch) {
687: switch (sync) {
688: case PETSCSF_WINDOW_SYNC_FENCE:
689: PetscCallMPI(MPI_Win_fence(fenceassert, *win));
690: break;
691: case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
692: break;
693: case PETSCSF_WINDOW_SYNC_ACTIVE: {
694: MPI_Group ingroup, outgroup;
695: PetscMPIInt isize, osize;
697: /* Open MPI 4.0.2 with btl=wader does not like calling
698: - MPI_Win_complete when ogroup is empty
699: - MPI_Win_wait when igroup is empty
700: The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
701: - each process who issues a call to MPI_Win_start issues a call to MPI_Win_Complete
702: - each process who issues a call to MPI_Win_post issues a call to MPI_Win_Wait
703: */
704: PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup));
705: PetscCallMPI(MPI_Group_size(ingroup, &isize));
706: PetscCallMPI(MPI_Group_size(outgroup, &osize));
707: if (osize) PetscCallMPI(MPI_Win_complete(*win));
708: if (isize) PetscCallMPI(MPI_Win_wait(*win));
709: } break;
710: default:
711: SETERRQ(wcomm, PETSC_ERR_PLIB, "Unknown synchronization type");
712: }
713: }
714: #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
715: if (link->flavor == PETSCSF_WINDOW_FLAVOR_DYNAMIC && !link->persistent) {
716: if (link->addr != NULL) PetscCallMPI(MPI_Win_detach(link->win, link->addr));
717: link->addr = NULL;
718: }
719: #endif
720: if (update) {
721: if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_fence(MPI_MODE_NOPUT | MPI_MODE_NOSUCCEED, *win));
722: PetscCall(PetscMemcpy(array, laddr, sf->nroots * bytes));
723: }
724: link->epoch = PETSC_FALSE;
725: link->inuse = PETSC_FALSE;
726: if (!link->persistent) {
727: link->rootdata = NULL;
728: link->leafdata = NULL;
729: }
730: if (!reuse) {
731: PetscCall(PetscFree(link->dyn_target_addr));
732: PetscCall(PetscFree(link->reqs));
733: PetscCallMPI(MPI_Win_free(&link->win));
734: PetscCall(PetscFree(link));
735: *win = MPI_WIN_NULL;
736: }
737: PetscFunctionReturn(PETSC_SUCCESS);
738: }
740: static PetscErrorCode PetscSFSetUp_Window(PetscSF sf)
741: {
742: PetscSF_Window *w = (PetscSF_Window *)sf->data;
743: MPI_Group ingroup, outgroup;
744: MPI_Comm comm;
746: PetscFunctionBegin;
747: PetscCall(PetscSFSetUpRanks(sf, MPI_GROUP_EMPTY));
748: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
749: if (w->window_comm == MPI_COMM_NULL) {
750: PetscInt nroots, nleaves, nranks;
751: PetscBool has_empty;
752: PetscMPIInt wcommrank;
753: PetscSF dynsf_full = NULL;
755: if (w->flavor == PETSCSF_WINDOW_FLAVOR_DYNAMIC) PetscCall(PetscSFWindowCreateDynamicSF(sf, &dynsf_full));
757: PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, NULL, NULL));
758: has_empty = (nroots == 0 && nleaves == 0) ? PETSC_TRUE : PETSC_FALSE;
759: nranks = sf->nranks;
760: PetscCall(PetscMalloc1(nranks, &w->wcommranks));
761: w->is_empty = has_empty;
762: PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, &has_empty, 1, MPIU_BOOL, MPI_LOR, comm));
763: if (has_empty) {
764: PetscMPIInt rank;
765: MPI_Comm raw_comm;
766: PetscSFNode *remotes;
768: PetscCallMPI(MPI_Comm_rank(comm, &rank));
769: PetscCallMPI(MPI_Comm_split(comm, w->is_empty ? 1 : 0, rank, &raw_comm));
770: PetscCall(PetscCommDuplicate(raw_comm, &w->window_comm, NULL));
771: PetscCallMPI(MPI_Comm_free(&raw_comm));
773: PetscCallMPI(MPI_Comm_rank(w->window_comm, &wcommrank));
774: if (!dynsf_full) PetscCall(PetscSFWindowCreateDynamicSF(sf, &dynsf_full));
775: PetscCall(PetscSFBcastBegin(dynsf_full, MPI_INT, &wcommrank, w->wcommranks, MPI_REPLACE));
776: PetscCall(PetscSFBcastEnd(dynsf_full, MPI_INT, &wcommrank, w->wcommranks, MPI_REPLACE));
778: if (w->flavor == PETSCSF_WINDOW_FLAVOR_DYNAMIC) {
779: PetscCall(PetscSFCreate(w->window_comm, &w->dynsf));
780: PetscCall(PetscSFSetType(w->dynsf, PETSCSFBASIC)); /* break recursion */
781: PetscCall(PetscMalloc1(sf->nranks, &remotes));
782: for (PetscInt i = 0; i < sf->nranks; i++) {
783: remotes[i].rank = w->wcommranks[i];
784: remotes[i].index = 0;
785: }
786: PetscCall(PetscSFSetGraph(w->dynsf, 1, sf->nranks, NULL, PETSC_OWN_POINTER, remotes, PETSC_OWN_POINTER));
787: }
788: } else {
789: PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)sf), &w->window_comm, NULL));
790: PetscCall(PetscArraycpy(w->wcommranks, sf->ranks, nranks));
791: PetscCall(PetscObjectReference((PetscObject)dynsf_full));
792: w->dynsf = dynsf_full;
793: }
794: if (w->dynsf) PetscCall(PetscSFSetUp(w->dynsf));
795: PetscCall(PetscSFDestroy(&dynsf_full));
796: }
797: switch (w->sync) {
798: case PETSCSF_WINDOW_SYNC_ACTIVE:
799: PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup));
800: default:
801: break;
802: }
803: PetscFunctionReturn(PETSC_SUCCESS);
804: }
806: static PetscErrorCode PetscSFSetFromOptions_Window(PetscSF sf, PetscOptionItems *PetscOptionsObject)
807: {
808: PetscSF_Window *w = (PetscSF_Window *)sf->data;
809: PetscSFWindowFlavorType flavor = w->flavor;
811: PetscFunctionBegin;
812: PetscOptionsHeadBegin(PetscOptionsObject, "PetscSF Window options");
813: PetscCall(PetscOptionsEnum("-sf_window_sync", "synchronization type to use for PetscSF Window communication", "PetscSFWindowSetSyncType", PetscSFWindowSyncTypes, (PetscEnum)w->sync, (PetscEnum *)&w->sync, NULL));
814: PetscCall(PetscOptionsEnum("-sf_window_flavor", "flavor to use for PetscSF Window creation", "PetscSFWindowSetFlavorType", PetscSFWindowFlavorTypes, (PetscEnum)flavor, (PetscEnum *)&flavor, NULL));
815: PetscCall(PetscSFWindowSetFlavorType(sf, flavor));
816: PetscOptionsHeadEnd();
817: PetscFunctionReturn(PETSC_SUCCESS);
818: }
820: static PetscErrorCode PetscSFReset_Window(PetscSF sf)
821: {
822: PetscSF_Window *w = (PetscSF_Window *)sf->data;
823: PetscSFDataLink link, next;
824: PetscSFWinLink wlink, wnext;
825: PetscInt i;
826: MPI_Comm wcomm;
827: PetscBool is_empty;
829: PetscFunctionBegin;
830: for (link = w->link; link; link = next) {
831: next = link->next;
832: PetscCallMPI(MPI_Type_free(&link->unit));
833: for (i = 0; i < sf->nranks; i++) {
834: PetscCallMPI(MPI_Type_free(&link->mine[i]));
835: PetscCallMPI(MPI_Type_free(&link->remote[i]));
836: }
837: PetscCall(PetscFree2(link->mine, link->remote));
838: PetscCall(PetscFree(link));
839: }
840: w->link = NULL;
841: wcomm = w->window_comm;
842: is_empty = w->is_empty;
843: if (!is_empty) {
844: for (wlink = w->wins; wlink; wlink = wnext) {
845: wnext = wlink->next;
846: PetscCheck(!wlink->inuse, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Window still in use with address %p", (void *)wlink->addr);
847: PetscCall(PetscFree(wlink->dyn_target_addr));
848: PetscCall(PetscFree(wlink->reqs));
849: PetscCallMPI(MPI_Win_free(&wlink->win));
850: PetscCall(PetscFree(wlink));
851: }
852: }
853: w->wins = NULL;
854: PetscCall(PetscSFDestroy(&w->dynsf));
855: if (w->info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_free(&w->info));
856: PetscCall(PetscCommDestroy(&w->window_comm));
857: PetscCall(PetscFree(w->wcommranks));
858: PetscFunctionReturn(PETSC_SUCCESS);
859: }
861: static PetscErrorCode PetscSFRegisterPersistent_Window(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
862: {
863: PetscSF_Window *w = (PetscSF_Window *)sf->data;
864: MPI_Aint bytes, wsize;
865: PetscBool is_empty;
866: PetscSFWinLink link;
868: PetscFunctionBegin;
869: PetscCall(PetscSFSetUp(sf));
870: if (w->flavor != PETSCSF_WINDOW_FLAVOR_DYNAMIC) PetscFunctionReturn(PETSC_SUCCESS);
871: PetscCall(PetscSFGetDatatypeSize_Internal(PetscObjectComm((PetscObject)sf), unit, &bytes));
872: wsize = (MPI_Aint)(bytes * sf->nroots);
873: is_empty = w->is_empty;
874: if (is_empty) PetscFunctionReturn(PETSC_SUCCESS);
875: PetscCall(PetscNew(&link));
876: link->flavor = w->flavor;
877: link->next = w->wins;
878: #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
879: {
880: MPI_Comm wcomm = w->window_comm;
881: PetscCallMPI(MPI_Win_create_dynamic(w->info, wcomm, &link->win));
882: }
883: #endif
884: PetscCall(PetscSFWindowAttach(sf, link, (void *)rootdata, wsize));
885: link->rootdata = (void *)rootdata;
886: link->leafdata = (void *)leafdata;
887: link->bytes = bytes;
888: link->epoch = PETSC_FALSE;
889: link->inuse = PETSC_FALSE;
890: link->persistent = PETSC_TRUE;
891: w->wins = link;
892: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
893: PetscInt i;
895: PetscCall(PetscMalloc1(sf->nranks, &link->reqs));
896: for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL;
897: }
898: PetscFunctionReturn(PETSC_SUCCESS);
899: }
901: static PetscErrorCode PetscSFDeregisterPersistent_Window(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
902: {
903: PetscSF_Window *w = (PetscSF_Window *)sf->data;
904: MPI_Aint bytes;
905: MPI_Comm wcomm;
906: PetscBool is_empty;
907: PetscSFWinLink *p;
909: PetscFunctionBegin;
910: PetscCall(PetscSFSetUp(sf));
911: if (w->flavor != PETSCSF_WINDOW_FLAVOR_DYNAMIC) PetscFunctionReturn(PETSC_SUCCESS);
912: PetscCall(PetscSFGetDatatypeSize_Internal(PetscObjectComm((PetscObject)sf), unit, &bytes));
913: wcomm = w->window_comm;
914: is_empty = w->is_empty;
915: if (is_empty) PetscFunctionReturn(PETSC_SUCCESS);
916: for (p = &w->wins; *p; p = &(*p)->next) {
917: PetscSFWinLink link = *p;
918: if (link->flavor == w->flavor && link->persistent && link->rootdata == rootdata && link->leafdata == leafdata && link->bytes == bytes) {
919: PetscCheck(!link->inuse, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Deregistering a window when communication is still in progress");
920: PetscCheck(!link->epoch, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Deregistering a window with an unconcluded epoch");
921: #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
922: PetscCallMPI(MPI_Win_detach(link->win, link->addr));
923: link->addr = NULL;
924: #endif
925: PetscCall(PetscFree(link->dyn_target_addr));
926: PetscCall(PetscFree(link->reqs));
927: PetscCallMPI(MPI_Win_free(&link->win));
928: *p = link->next;
929: PetscCall(PetscFree(link));
930: break;
931: }
932: }
933: PetscFunctionReturn(PETSC_SUCCESS);
934: }
936: static PetscErrorCode PetscSFDestroy_Window(PetscSF sf)
937: {
938: PetscFunctionBegin;
939: PetscCall(PetscSFReset_Window(sf));
940: PetscCall(PetscFree(sf->data));
941: PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", NULL));
942: PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", NULL));
943: PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", NULL));
944: PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", NULL));
945: PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", NULL));
946: PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", NULL));
947: PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFRegisterPersistent_C", NULL));
948: PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFDeregisterPersistent_C", NULL));
949: PetscFunctionReturn(PETSC_SUCCESS);
950: }
952: static PetscErrorCode PetscSFView_Window(PetscSF sf, PetscViewer viewer)
953: {
954: PetscSF_Window *w = (PetscSF_Window *)sf->data;
955: PetscBool iascii;
956: PetscViewerFormat format;
958: PetscFunctionBegin;
959: PetscCall(PetscViewerGetFormat(viewer, &format));
960: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
961: if (iascii) {
962: PetscCall(PetscViewerASCIIPrintf(viewer, " current flavor=%s synchronization=%s MultiSF sort=%s\n", PetscSFWindowFlavorTypes[w->flavor], PetscSFWindowSyncTypes[w->sync], sf->rankorder ? "rank-order" : "unordered"));
963: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
964: if (w->info != MPI_INFO_NULL) {
965: PetscMPIInt k, nkeys;
966: char key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL];
968: PetscCallMPI(MPI_Info_get_nkeys(w->info, &nkeys));
969: PetscCall(PetscViewerASCIIPrintf(viewer, " current info with %d keys. Ordered key-value pairs follow:\n", nkeys));
970: for (k = 0; k < nkeys; k++) {
971: PetscMPIInt flag;
973: PetscCallMPI(MPI_Info_get_nthkey(w->info, k, key));
974: PetscCallMPI(MPI_Info_get(w->info, key, MPI_MAX_INFO_VAL, value, &flag));
975: PetscCheck(flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing key %s", key);
976: PetscCall(PetscViewerASCIIPrintf(viewer, " %s = %s\n", key, value));
977: }
978: } else {
979: PetscCall(PetscViewerASCIIPrintf(viewer, " current info=MPI_INFO_NULL\n"));
980: }
981: }
982: }
983: PetscFunctionReturn(PETSC_SUCCESS);
984: }
986: static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf, PetscSFDuplicateOption opt, PetscSF newsf)
987: {
988: PetscSF_Window *w = (PetscSF_Window *)sf->data;
989: PetscSFWindowSyncType synctype;
991: PetscFunctionBegin;
992: synctype = w->sync;
993: /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */
994: if (!sf->setupcalled) synctype = PETSCSF_WINDOW_SYNC_LOCK;
995: PetscCall(PetscSFWindowSetSyncType(newsf, synctype));
996: PetscCall(PetscSFWindowSetFlavorType(newsf, w->flavor));
997: PetscCall(PetscSFWindowSetInfo(newsf, w->info));
998: PetscFunctionReturn(PETSC_SUCCESS);
999: }
1001: static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1002: {
1003: PetscSF_Window *w = (PetscSF_Window *)sf->data;
1004: PetscMPIInt nranks;
1005: const PetscMPIInt *ranks;
1006: const MPI_Aint *target_disp;
1007: const MPI_Datatype *mine, *remote;
1008: MPI_Request *reqs;
1009: MPI_Win win;
1011: PetscFunctionBegin;
1012: PetscCheck(op == MPI_REPLACE, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PetscSFBcastBegin_Window with op!=MPI_REPLACE has not been implemented");
1013: PetscCall(PetscSFGetRootRanks(sf, &nranks, NULL, NULL, NULL, NULL));
1014: PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote));
1015: PetscCall(PetscSFGetWindow(sf, unit, (void *)rootdata, leafdata, w->sync, PETSC_TRUE, MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE, MPI_MODE_NOPUT, 0, &target_disp, &reqs, &win));
1016: ranks = w->wcommranks;
1017: for (PetscMPIInt i = 0; i < nranks; i++) {
1018: MPI_Aint tdp = target_disp ? target_disp[i] : 0;
1019: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
1020: PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win));
1021: #if defined(PETSC_HAVE_MPI_RGET)
1022: PetscCallMPI(MPI_Rget(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win, &reqs[i]));
1023: #else
1024: PetscCallMPI(MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win));
1025: #endif
1026: } else {
1027: CHKMEMQ;
1028: PetscCallMPI(MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win));
1029: CHKMEMQ;
1030: }
1031: }
1032: PetscFunctionReturn(PETSC_SUCCESS);
1033: }
1035: static PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1036: {
1037: PetscSF_Window *w = (PetscSF_Window *)sf->data;
1038: MPI_Win win;
1039: MPI_Request *reqs = NULL;
1041: PetscFunctionBegin;
1042: PetscCall(PetscSFFindWindow(sf, unit, rootdata, leafdata, &win, &reqs));
1043: if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE));
1044: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
1045: PetscMPIInt nranks;
1046: const PetscMPIInt *ranks;
1048: PetscCall(PetscSFGetRootRanks(sf, &nranks, NULL, NULL, NULL, NULL));
1049: ranks = w->wcommranks;
1050: for (PetscMPIInt i = 0; i < nranks; i++) PetscCallMPI(MPI_Win_unlock(ranks[i], win));
1051: }
1052: PetscCall(PetscSFRestoreWindow(sf, unit, (void *)rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSTORE | MPI_MODE_NOSUCCEED, PETSC_FALSE, &win));
1053: PetscFunctionReturn(PETSC_SUCCESS);
1054: }
1056: static PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1057: {
1058: PetscSF_Window *w = (PetscSF_Window *)sf->data;
1059: PetscMPIInt nranks;
1060: const PetscMPIInt *ranks;
1061: const MPI_Aint *target_disp;
1062: const MPI_Datatype *mine, *remote;
1063: MPI_Win win;
1065: PetscFunctionBegin;
1066: PetscCall(PetscSFGetRootRanks(sf, &nranks, NULL, NULL, NULL, NULL));
1067: PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote));
1068: PetscCall(PetscSFWindowOpTranslate(&op));
1069: PetscCall(PetscSFGetWindow(sf, unit, rootdata, (void *)leafdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win));
1070: ranks = w->wcommranks;
1071: for (PetscMPIInt i = 0; i < nranks; i++) {
1072: MPI_Aint tdp = target_disp ? target_disp[i] : 0;
1074: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win));
1075: PetscCallMPI(MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win));
1076: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i], win));
1077: }
1078: PetscFunctionReturn(PETSC_SUCCESS);
1079: }
1081: static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1082: {
1083: PetscSF_Window *w = (PetscSF_Window *)sf->data;
1084: MPI_Win win;
1085: MPI_Request *reqs = NULL;
1087: PetscFunctionBegin;
1088: PetscCall(PetscSFFindWindow(sf, unit, rootdata, leafdata, &win, &reqs));
1089: if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE));
1090: PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win));
1091: PetscFunctionReturn(PETSC_SUCCESS);
1092: }
1094: static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op)
1095: {
1096: PetscMPIInt nranks;
1097: const PetscMPIInt *ranks;
1098: const MPI_Datatype *mine, *remote;
1099: const MPI_Aint *target_disp;
1100: MPI_Win win;
1101: PetscSF_Window *w = (PetscSF_Window *)sf->data;
1102: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
1103: PetscSFWindowFlavorType oldf;
1104: #endif
1106: PetscFunctionBegin;
1107: PetscCall(PetscSFGetRootRanks(sf, &nranks, NULL, NULL, NULL, NULL));
1108: PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote));
1109: PetscCall(PetscSFWindowOpTranslate(&op));
1110: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
1111: /* FetchAndOp without MPI_Get_Accumulate requires locking.
1112: we create a new window every time to not interfere with user-defined MPI_Info which may have used "no_locks"="true" */
1113: oldf = w->flavor;
1114: w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
1115: PetscCall(PetscSFGetWindow(sf, unit, rootdata, (void *)leafdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, 0, 0, &target_disp, NULL, &win));
1116: #else
1117: PetscCall(PetscSFGetWindow(sf, unit, rootdata, (void *)leafdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win));
1118: #endif
1119: ranks = w->wcommranks;
1120: for (PetscMPIInt i = 0; i < nranks; i++) {
1121: MPI_Aint tdp = target_disp ? target_disp[i] : 0;
1123: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
1124: PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, ranks[i], 0, win));
1125: PetscCallMPI(MPI_Get(leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], win));
1126: PetscCallMPI(MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win));
1127: PetscCallMPI(MPI_Win_unlock(ranks[i], win));
1128: #else
1129: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], 0, win));
1130: PetscCallMPI(MPI_Get_accumulate((void *)leafdata, 1, mine[i], leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win));
1131: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i], win));
1132: #endif
1133: }
1134: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
1135: w->flavor = oldf;
1136: #endif
1137: PetscFunctionReturn(PETSC_SUCCESS);
1138: }
1140: static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1141: {
1142: MPI_Win win;
1143: #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
1144: PetscSF_Window *w = (PetscSF_Window *)sf->data;
1145: #endif
1146: MPI_Request *reqs = NULL;
1148: PetscFunctionBegin;
1149: PetscCall(PetscSFFindWindow(sf, unit, rootdata, leafdata, &win, &reqs));
1150: if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE));
1151: #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
1152: PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win));
1153: #else
1154: PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, PETSC_TRUE, &win));
1155: #endif
1156: PetscFunctionReturn(PETSC_SUCCESS);
1157: }
1159: PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf)
1160: {
1161: PetscSF_Window *w = (PetscSF_Window *)sf->data;
1163: PetscFunctionBegin;
1164: sf->ops->SetUp = PetscSFSetUp_Window;
1165: sf->ops->SetFromOptions = PetscSFSetFromOptions_Window;
1166: sf->ops->Reset = PetscSFReset_Window;
1167: sf->ops->Destroy = PetscSFDestroy_Window;
1168: sf->ops->View = PetscSFView_Window;
1169: sf->ops->Duplicate = PetscSFDuplicate_Window;
1170: sf->ops->BcastBegin = PetscSFBcastBegin_Window;
1171: sf->ops->BcastEnd = PetscSFBcastEnd_Window;
1172: sf->ops->ReduceBegin = PetscSFReduceBegin_Window;
1173: sf->ops->ReduceEnd = PetscSFReduceEnd_Window;
1174: sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window;
1175: sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Window;
1177: PetscCall(PetscNew(&w));
1178: sf->data = (void *)w;
1179: w->sync = PETSCSF_WINDOW_SYNC_FENCE;
1180: w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
1181: w->info = MPI_INFO_NULL;
1182: w->window_comm = MPI_COMM_NULL;
1184: PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", PetscSFWindowSetSyncType_Window));
1185: PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", PetscSFWindowGetSyncType_Window));
1186: PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", PetscSFWindowSetFlavorType_Window));
1187: PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", PetscSFWindowGetFlavorType_Window));
1188: PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", PetscSFWindowSetInfo_Window));
1189: PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", PetscSFWindowGetInfo_Window));
1190: PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFRegisterPersistent_C", PetscSFRegisterPersistent_Window));
1191: PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFDeregisterPersistent_C", PetscSFDeregisterPersistent_Window));
1193: #if defined(PETSC_HAVE_OPENMPI)
1194: #if PETSC_PKG_OPENMPI_VERSION_LE(1, 6, 0)
1195: {
1196: PetscBool ackbug = PETSC_FALSE;
1197: PetscCall(PetscOptionsGetBool(NULL, NULL, "-acknowledge_ompi_onesided_bug", &ackbug, NULL));
1198: if (ackbug) {
1199: PetscCall(PetscInfo(sf, "Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n"));
1200: } else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_LIB, "Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed");
1201: }
1202: #endif
1203: #endif
1204: PetscFunctionReturn(PETSC_SUCCESS);
1205: }