Actual source code: ex1.c

  1: static const char help[] = "Test star forest communication (PetscSF)\n\n";

  3: /*
  4:     Description: A star is a simple tree with one root and zero or more leaves.
  5:     A star forest is a union of disjoint stars.
  6:     Many common communication patterns can be expressed as updates of rootdata using leafdata and vice-versa.
  7:     This example creates a star forest, communicates values using the graph (see options for types of communication), views the graph, then destroys it.
  8: */

 10: /*
 11:   Include petscsf.h so we can use PetscSF objects. Note that this automatically
 12:   includes petscsys.h.
 13: */
 14: #include <petscsf.h>
 15: #include <petscviewer.h>

 17: /* like PetscSFView() but with alternative array of local indices */
 18: static PetscErrorCode PetscSFViewCustomLocals_Private(PetscSF sf, const PetscInt locals[], PetscViewer viewer)
 19: {
 20:   const PetscSFNode *iremote;
 21:   PetscInt           i, nroots, nleaves;
 22:   PetscMPIInt        rank, nranks;

 24:   PetscFunctionBeginUser;
 25:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
 26:   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, NULL, &iremote));
 27:   PetscCall(PetscSFGetRootRanks(sf, &nranks, NULL, NULL, NULL, NULL));
 28:   PetscCall(PetscViewerASCIIPushTab(viewer));
 29:   PetscCall(PetscViewerASCIIPushSynchronized(viewer));
 30:   PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, nroots, nleaves, nranks));
 31:   for (i = 0; i < nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%d,%" PetscInt_FMT ")\n", rank, locals[i], (PetscMPIInt)iremote[i].rank, iremote[i].index));
 32:   PetscCall(PetscViewerFlush(viewer));
 33:   PetscCall(PetscViewerASCIIPopTab(viewer));
 34:   PetscCall(PetscViewerASCIIPopSynchronized(viewer));
 35:   PetscFunctionReturn(PETSC_SUCCESS);
 36: }

 38: int main(int argc, char **argv)
 39: {
 40:   PetscInt     i, bs = 2, nroots, nrootsalloc, nleaves, nleavesalloc, *mine, stride;
 41:   PetscSFNode *remote;
 42:   PetscMPIInt  rank, size;
 43:   PetscSF      sf, vsf;
 44:   PetscBool    test_all, test_bcast, test_bcastop, test_reduce, test_degree, test_fetchandop, test_gather, test_scatter, test_embed, test_invert, test_sf_distribute, test_char, test_vector = PETSC_FALSE;
 45:   MPI_Op       mop = MPI_OP_NULL; /* initialize to prevent compiler warnings with cxx_quad build */
 46:   char         opstring[256];
 47:   PetscBool    strflg;

 49:   PetscFunctionBeginUser;
 50:   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
 51:   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
 52:   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));

 54:   PetscOptionsBegin(PETSC_COMM_WORLD, "", "PetscSF Test Options", "none");
 55:   test_all = PETSC_FALSE;
 56:   PetscCall(PetscOptionsBool("-test_all", "Test all SF communications", "", test_all, &test_all, NULL));
 57:   test_bcast = test_all;
 58:   PetscCall(PetscOptionsBool("-test_bcast", "Test broadcast", "", test_bcast, &test_bcast, NULL));
 59:   test_bcastop = test_all;
 60:   PetscCall(PetscOptionsBool("-test_bcastop", "Test broadcast and reduce", "", test_bcastop, &test_bcastop, NULL));
 61:   test_reduce = test_all;
 62:   PetscCall(PetscOptionsBool("-test_reduce", "Test reduction", "", test_reduce, &test_reduce, NULL));
 63:   test_char = test_all;
 64:   PetscCall(PetscOptionsBool("-test_char", "Test signed char, unsigned char, and char", "", test_char, &test_char, NULL));
 65:   mop = MPI_SUM;
 66:   PetscCall(PetscStrncpy(opstring, "sum", sizeof(opstring)));
 67:   PetscCall(PetscOptionsString("-test_op", "Designate which MPI_Op to use", "", opstring, opstring, sizeof(opstring), NULL));
 68:   PetscCall(PetscStrcmp("sum", opstring, &strflg));
 69:   if (strflg) mop = MPIU_SUM;
 70:   PetscCall(PetscStrcmp("prod", opstring, &strflg));
 71:   if (strflg) mop = MPI_PROD;
 72:   PetscCall(PetscStrcmp("max", opstring, &strflg));
 73:   if (strflg) mop = MPI_MAX;
 74:   PetscCall(PetscStrcmp("min", opstring, &strflg));
 75:   if (strflg) mop = MPI_MIN;
 76:   PetscCall(PetscStrcmp("land", opstring, &strflg));
 77:   if (strflg) mop = MPI_LAND;
 78:   PetscCall(PetscStrcmp("band", opstring, &strflg));
 79:   if (strflg) mop = MPI_BAND;
 80:   PetscCall(PetscStrcmp("lor", opstring, &strflg));
 81:   if (strflg) mop = MPI_LOR;
 82:   PetscCall(PetscStrcmp("bor", opstring, &strflg));
 83:   if (strflg) mop = MPI_BOR;
 84:   PetscCall(PetscStrcmp("lxor", opstring, &strflg));
 85:   if (strflg) mop = MPI_LXOR;
 86:   PetscCall(PetscStrcmp("bxor", opstring, &strflg));
 87:   if (strflg) mop = MPI_BXOR;
 88:   test_degree = test_all;
 89:   PetscCall(PetscOptionsBool("-test_degree", "Test computation of vertex degree", "", test_degree, &test_degree, NULL));
 90:   test_fetchandop = test_all;
 91:   PetscCall(PetscOptionsBool("-test_fetchandop", "Test atomic Fetch-And-Op", "", test_fetchandop, &test_fetchandop, NULL));
 92:   test_gather = test_all;
 93:   PetscCall(PetscOptionsBool("-test_gather", "Test point gather", "", test_gather, &test_gather, NULL));
 94:   test_scatter = test_all;
 95:   PetscCall(PetscOptionsBool("-test_scatter", "Test point scatter", "", test_scatter, &test_scatter, NULL));
 96:   test_embed = test_all;
 97:   PetscCall(PetscOptionsBool("-test_embed", "Test point embed", "", test_embed, &test_embed, NULL));
 98:   test_invert = test_all;
 99:   PetscCall(PetscOptionsBool("-test_invert", "Test point invert", "", test_invert, &test_invert, NULL));
100:   stride = 1;
101:   PetscCall(PetscOptionsInt("-stride", "Stride for leaf and root data", "", stride, &stride, NULL));
102:   test_sf_distribute = PETSC_FALSE;
103:   PetscCall(PetscOptionsBool("-test_sf_distribute", "Create an SF that 'distributes' to each process, like an alltoall", "", test_sf_distribute, &test_sf_distribute, NULL));
104:   PetscCall(PetscOptionsString("-test_op", "Designate which MPI_Op to use", "", opstring, opstring, sizeof(opstring), NULL));
105:   PetscCall(PetscOptionsInt("-bs", "Block size for vectorial SF", "", bs, &bs, NULL));
106:   PetscCall(PetscOptionsBool("-test_vector", "Run tests using the vectorial SF", "", test_vector, &test_vector, NULL));
107:   PetscOptionsEnd();

109:   if (test_sf_distribute) {
110:     nroots       = size;
111:     nrootsalloc  = size;
112:     nleaves      = size;
113:     nleavesalloc = size;
114:     mine         = NULL;
115:     PetscCall(PetscMalloc1(nleaves, &remote));
116:     for (PetscMPIInt ii = 0; ii < size; ii++) {
117:       remote[ii].rank  = ii;
118:       remote[ii].index = rank;
119:     }
120:   } else {
121:     nroots       = 2 + (PetscInt)(rank == 0);
122:     nrootsalloc  = nroots * stride;
123:     nleaves      = 2 + (PetscInt)(rank > 0);
124:     nleavesalloc = nleaves * stride;
125:     mine         = NULL;
126:     if (stride > 1) {
127:       PetscCall(PetscMalloc1(nleaves, &mine));
128:       for (PetscInt i = 0; i < nleaves; i++) mine[i] = stride * i;
129:     }
130:     PetscCall(PetscMalloc1(nleaves, &remote));
131:     /* Left periodic neighbor */
132:     remote[0].rank  = (rank + size - 1) % size;
133:     remote[0].index = 1 * stride;
134:     /* Right periodic neighbor */
135:     remote[1].rank  = (rank + 1) % size;
136:     remote[1].index = 0 * stride;
137:     if (rank > 0) { /* All processes reference rank 0, index 1 */
138:       remote[2].rank  = 0;
139:       remote[2].index = 2 * stride;
140:     }
141:   }

143:   /* Create a star forest for communication */
144:   PetscCall(PetscSFCreate(PETSC_COMM_WORLD, &sf));
145:   PetscCall(PetscSFSetFromOptions(sf));
146:   PetscCall(PetscSFSetGraph(sf, nrootsalloc, nleaves, mine, PETSC_OWN_POINTER, remote, PETSC_OWN_POINTER));
147:   PetscCall(PetscSFSetUp(sf));

149:   /* We can also obtain a strided SF to communicate interleaved blocks of data */
150:   PetscCall(PetscSFCreateStridedSF(sf, bs, PETSC_DECIDE, PETSC_DECIDE, &vsf));
151:   if (test_vector) { /* perform all tests below using the vectorial SF */
152:     PetscSF t = sf;

154:     sf  = vsf;
155:     vsf = t;

157:     nroots *= bs;
158:     nleaves *= bs;
159:     nrootsalloc *= bs;
160:     nleavesalloc *= bs;
161:   }
162:   PetscCall(PetscSFDestroy(&vsf));

164:   /* View graph, mostly useful for debugging purposes. */
165:   PetscCall(PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD, PETSC_VIEWER_ASCII_INFO_DETAIL));
166:   PetscCall(PetscSFView(sf, PETSC_VIEWER_STDOUT_WORLD));
167:   PetscCall(PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD));

169:   if (test_bcast) { /* broadcast rootdata into leafdata */
170:     PetscInt *rootdata, *leafdata;
171:     /* Allocate space for send and receive buffers. This example communicates PetscInt, but other types, including
172:      * user-defined structures, could also be used. */
173:     PetscCall(PetscMalloc2(nrootsalloc, &rootdata, nleavesalloc, &leafdata));
174:     /* Set rootdata buffer to be broadcast */
175:     for (i = 0; i < nrootsalloc; i++) rootdata[i] = -1;
176:     for (i = 0; i < nroots; i++) rootdata[i * stride] = 100 * (rank + 1) + i;
177:     /* Initialize local buffer, these values are never used. */
178:     for (i = 0; i < nleavesalloc; i++) leafdata[i] = -1;
179:     /* Broadcast entries from rootdata to leafdata. Computation or other communication can be performed between the begin and end calls. */
180:     // test persistent communication code paths by repeated bcast several times
181:     PetscCall(PetscSFRegisterPersistent(sf, MPIU_INT, rootdata, leafdata));
182:     for (PetscInt i = 0; i < 10; i++) {
183:       PetscCall(PetscSFBcastBegin(sf, MPIU_INT, rootdata, leafdata, MPI_REPLACE));
184:       PetscCall(PetscSFBcastEnd(sf, MPIU_INT, rootdata, leafdata, MPI_REPLACE));
185:     }
186:     PetscCall(PetscSFDeregisterPersistent(sf, MPIU_INT, rootdata, leafdata));
187:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Bcast Rootdata\n"));
188:     PetscCall(PetscIntView(nrootsalloc, rootdata, PETSC_VIEWER_STDOUT_WORLD));
189:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Bcast Leafdata\n"));
190:     PetscCall(PetscIntView(nleavesalloc, leafdata, PETSC_VIEWER_STDOUT_WORLD));
191:     PetscCall(PetscFree2(rootdata, leafdata));
192:   }

194:   if (test_bcast && test_char) { /* Bcast with char */
195:     PetscInt len;
196:     char     buf[256];
197:     char    *rootdata, *leafdata;
198:     PetscCall(PetscMalloc2(nrootsalloc, &rootdata, nleavesalloc, &leafdata));
199:     /* Set rootdata buffer to be broadcast */
200:     for (i = 0; i < nrootsalloc; i++) rootdata[i] = '*';
201:     for (i = 0; i < nroots; i++) rootdata[i * stride] = (char)('A' + rank * 3 + i); /* rank is very small, so it is fine to compute a char */
202:     /* Initialize local buffer, these values are never used. */
203:     for (i = 0; i < nleavesalloc; i++) leafdata[i] = '?';

205:     PetscCall(PetscSFBcastBegin(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE));
206:     PetscCall(PetscSFBcastEnd(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE));

208:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Bcast Rootdata in type of char\n"));
209:     len = 0;
210:     PetscCall(PetscSNPrintf(buf, 256, "%4d:", rank));
211:     len += 5;
212:     for (i = 0; i < nrootsalloc; i++) {
213:       PetscCall(PetscSNPrintf(buf + len, 256 - len, "%5c", rootdata[i]));
214:       len += 5;
215:     }
216:     PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%s\n", buf));
217:     PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, PETSC_STDOUT));

219:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Bcast Leafdata in type of char\n"));
220:     len = 0;
221:     PetscCall(PetscSNPrintf(buf, 256, "%4d:", rank));
222:     len += 5;
223:     for (i = 0; i < nleavesalloc; i++) {
224:       PetscCall(PetscSNPrintf(buf + len, 256 - len, "%5c", leafdata[i]));
225:       len += 5;
226:     }
227:     PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%s\n", buf));
228:     PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, PETSC_STDOUT));

230:     PetscCall(PetscFree2(rootdata, leafdata));
231:   }

233:   if (test_bcastop) { /* Reduce rootdata into leafdata */
234:     PetscInt *rootdata, *leafdata;
235:     /* Allocate space for send and receive buffers. This example communicates PetscInt, but other types, including
236:      * user-defined structures, could also be used. */
237:     PetscCall(PetscMalloc2(nrootsalloc, &rootdata, nleavesalloc, &leafdata));
238:     /* Set rootdata buffer to be broadcast */
239:     for (i = 0; i < nrootsalloc; i++) rootdata[i] = -1;
240:     for (i = 0; i < nroots; i++) rootdata[i * stride] = 100 * (rank + 1) + i;
241:     /* Set leaf values to reduce with */
242:     for (i = 0; i < nleavesalloc; i++) leafdata[i] = -10 * (rank + 1) - i;
243:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Pre-BcastAndOp Leafdata\n"));
244:     PetscCall(PetscIntView(nleavesalloc, leafdata, PETSC_VIEWER_STDOUT_WORLD));
245:     /* Broadcast entries from rootdata to leafdata. Computation or other communication can be performed between the begin and end calls. */
246:     PetscCall(PetscSFBcastBegin(sf, MPIU_INT, rootdata, leafdata, mop));
247:     PetscCall(PetscSFBcastEnd(sf, MPIU_INT, rootdata, leafdata, mop));
248:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## BcastAndOp Rootdata\n"));
249:     PetscCall(PetscIntView(nrootsalloc, rootdata, PETSC_VIEWER_STDOUT_WORLD));
250:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## BcastAndOp Leafdata\n"));
251:     PetscCall(PetscIntView(nleavesalloc, leafdata, PETSC_VIEWER_STDOUT_WORLD));
252:     PetscCall(PetscFree2(rootdata, leafdata));
253:   }

255:   if (test_reduce) { /* Reduce leafdata into rootdata */
256:     PetscInt *rootdata, *leafdata;
257:     PetscCall(PetscMalloc2(nrootsalloc, &rootdata, nleavesalloc, &leafdata));
258:     /* Initialize rootdata buffer in which the result of the reduction will appear. */
259:     for (i = 0; i < nrootsalloc; i++) rootdata[i] = -1;
260:     for (i = 0; i < nroots; i++) rootdata[i * stride] = 100 * (rank + 1) + i;
261:     /* Set leaf values to reduce. */
262:     for (i = 0; i < nleavesalloc; i++) leafdata[i] = -1;
263:     for (i = 0; i < nleaves; i++) leafdata[i * stride] = 1000 * (rank + 1) + 10 * i;
264:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Pre-Reduce Rootdata\n"));
265:     PetscCall(PetscIntView(nrootsalloc, rootdata, PETSC_VIEWER_STDOUT_WORLD));
266:     /* Perform reduction. Computation or other communication can be performed between the begin and end calls.
267:      * This example sums the values, but other MPI_Ops can be used (e.g MPI_MAX, MPI_PROD). */
268:     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, leafdata, rootdata, mop));
269:     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, leafdata, rootdata, mop));
270:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Reduce Leafdata\n"));
271:     PetscCall(PetscIntView(nleavesalloc, leafdata, PETSC_VIEWER_STDOUT_WORLD));
272:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Reduce Rootdata\n"));
273:     PetscCall(PetscIntView(nrootsalloc, rootdata, PETSC_VIEWER_STDOUT_WORLD));
274:     PetscCall(PetscFree2(rootdata, leafdata));
275:   }

277:   if (test_reduce && test_char) { /* Reduce with signed char */
278:     PetscInt     len;
279:     char         buf[256];
280:     signed char *rootdata, *leafdata;
281:     PetscCall(PetscMalloc2(nrootsalloc, &rootdata, nleavesalloc, &leafdata));
282:     /* Initialize rootdata buffer in which the result of the reduction will appear. */
283:     for (i = 0; i < nrootsalloc; i++) rootdata[i] = -1;
284:     for (i = 0; i < nroots; i++) rootdata[i * stride] = (signed char)(10 * (rank + 1) + i);
285:     /* Set leaf values to reduce. */
286:     for (i = 0; i < nleavesalloc; i++) leafdata[i] = -1;
287:     for (i = 0; i < nleaves; i++) leafdata[i * stride] = (signed char)(50 * (rank + 1) + 10 * i);
288:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Pre-Reduce Rootdata in type of signed char\n"));

290:     len = 0;
291:     PetscCall(PetscSNPrintf(buf, 256, "%4d:", rank));
292:     len += 5;
293:     for (i = 0; i < nrootsalloc; i++) {
294:       PetscCall(PetscSNPrintf(buf + len, 256 - len, "%5d", rootdata[i]));
295:       len += 5;
296:     }
297:     PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%s\n", buf));
298:     PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, PETSC_STDOUT));

300:     /* Using MPI_CHAR should trigger an error since MPI standard does not support reduction on MPI_CHAR.
301:        Testing with -test_op max, one can see the sign does take effect in MPI_MAX.
302:      */
303:     PetscCall(PetscSFReduceBegin(sf, MPI_SIGNED_CHAR, leafdata, rootdata, mop));
304:     PetscCall(PetscSFReduceEnd(sf, MPI_SIGNED_CHAR, leafdata, rootdata, mop));

306:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Reduce Leafdata in type of signed char\n"));
307:     len = 0;
308:     PetscCall(PetscSNPrintf(buf, 256, "%4d:", rank));
309:     len += 5;
310:     for (i = 0; i < nleavesalloc; i++) {
311:       PetscCall(PetscSNPrintf(buf + len, 256 - len, "%5d", leafdata[i]));
312:       len += 5;
313:     }
314:     PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%s\n", buf));
315:     PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, PETSC_STDOUT));

317:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Reduce Rootdata in type of signed char\n"));
318:     len = 0;
319:     PetscCall(PetscSNPrintf(buf, 256, "%4d:", rank));
320:     len += 5;
321:     for (i = 0; i < nrootsalloc; i++) {
322:       PetscCall(PetscSNPrintf(buf + len, 256 - len, "%5d", rootdata[i]));
323:       len += 5;
324:     }
325:     PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%s\n", buf));
326:     PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, PETSC_STDOUT));

328:     PetscCall(PetscFree2(rootdata, leafdata));
329:   }

331:   if (test_reduce && test_char) { /* Reduce with unsigned char */
332:     PetscInt       len;
333:     char           buf[256];
334:     unsigned char *rootdata, *leafdata;
335:     PetscCall(PetscMalloc2(nrootsalloc, &rootdata, nleavesalloc, &leafdata));
336:     /* Initialize rootdata buffer in which the result of the reduction will appear. */
337:     for (i = 0; i < nrootsalloc; i++) rootdata[i] = 0;
338:     for (i = 0; i < nroots; i++) rootdata[i * stride] = (unsigned char)(10 * (rank + 1) + i);
339:     /* Set leaf values to reduce. */
340:     for (i = 0; i < nleavesalloc; i++) leafdata[i] = 0;
341:     for (i = 0; i < nleaves; i++) leafdata[i * stride] = (unsigned char)(50 * (rank + 1) + 10 * i);
342:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Pre-Reduce Rootdata in type of unsigned char\n"));

344:     len = 0;
345:     PetscCall(PetscSNPrintf(buf, 256, "%4d:", rank));
346:     len += 5;
347:     for (i = 0; i < nrootsalloc; i++) {
348:       PetscCall(PetscSNPrintf(buf + len, 256 - len, "%5u", rootdata[i]));
349:       len += 5;
350:     }
351:     PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%s\n", buf));
352:     PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, PETSC_STDOUT));

354:     /* Using MPI_CHAR should trigger an error since MPI standard does not support reduction on MPI_CHAR.
355:        Testing with -test_op max, one can see the sign does take effect in MPI_MAX.
356:      */
357:     PetscCall(PetscSFReduceBegin(sf, MPI_UNSIGNED_CHAR, leafdata, rootdata, mop));
358:     PetscCall(PetscSFReduceEnd(sf, MPI_UNSIGNED_CHAR, leafdata, rootdata, mop));

360:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Reduce Leafdata in type of unsigned char\n"));
361:     len = 0;
362:     PetscCall(PetscSNPrintf(buf, 256, "%4d:", rank));
363:     len += 5;
364:     for (i = 0; i < nleavesalloc; i++) {
365:       PetscCall(PetscSNPrintf(buf + len, 256 - len, "%5u", leafdata[i]));
366:       len += 5;
367:     }
368:     PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%s\n", buf));
369:     PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, PETSC_STDOUT));

371:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Reduce Rootdata in type of unsigned char\n"));
372:     len = 0;
373:     PetscCall(PetscSNPrintf(buf, 256, "%4d:", rank));
374:     len += 5;
375:     for (i = 0; i < nrootsalloc; i++) {
376:       PetscCall(PetscSNPrintf(buf + len, 256 - len, "%5u", rootdata[i]));
377:       len += 5;
378:     }
379:     PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%s\n", buf));
380:     PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, PETSC_STDOUT));

382:     PetscCall(PetscFree2(rootdata, leafdata));
383:   }

385:   if (test_degree) {
386:     const PetscInt *degree;
387:     PetscCall(PetscSFComputeDegreeBegin(sf, &degree));
388:     PetscCall(PetscSFComputeDegreeEnd(sf, &degree));
389:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Root degrees\n"));
390:     PetscCall(PetscIntView(nrootsalloc, degree, PETSC_VIEWER_STDOUT_WORLD));
391:   }

393:   if (test_fetchandop) {
394:     /* Cannot use text compare here because token ordering is not deterministic */
395:     PetscInt *leafdata, *leafupdate, *rootdata;
396:     PetscCall(PetscMalloc3(nleavesalloc, &leafdata, nleavesalloc, &leafupdate, nrootsalloc, &rootdata));
397:     for (i = 0; i < nleavesalloc; i++) leafdata[i] = -1;
398:     for (i = 0; i < nleaves; i++) leafdata[i * stride] = 1;
399:     for (i = 0; i < nrootsalloc; i++) rootdata[i] = -1;
400:     for (i = 0; i < nroots; i++) rootdata[i * stride] = 0;
401:     PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, rootdata, leafdata, leafupdate, mop));
402:     PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, rootdata, leafdata, leafupdate, mop));
403:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Rootdata (sum of 1 from each leaf)\n"));
404:     PetscCall(PetscIntView(nrootsalloc, rootdata, PETSC_VIEWER_STDOUT_WORLD));
405:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Leafupdate (value at roots prior to my atomic update)\n"));
406:     PetscCall(PetscIntView(nleavesalloc, leafupdate, PETSC_VIEWER_STDOUT_WORLD));
407:     PetscCall(PetscFree3(leafdata, leafupdate, rootdata));
408:   }

410:   if (test_gather) {
411:     const PetscInt *degree;
412:     PetscInt        inedges, *indata, *outdata;
413:     PetscCall(PetscSFComputeDegreeBegin(sf, &degree));
414:     PetscCall(PetscSFComputeDegreeEnd(sf, &degree));
415:     for (i = 0, inedges = 0; i < nrootsalloc; i++) inedges += degree[i];
416:     PetscCall(PetscMalloc2(inedges, &indata, nleavesalloc, &outdata));
417:     for (i = 0; i < nleavesalloc; i++) outdata[i] = -1;
418:     for (i = 0; i < nleaves; i++) outdata[i * stride] = 1000 * (rank + 1) + i;
419:     PetscCall(PetscSFGatherBegin(sf, MPIU_INT, outdata, indata));
420:     PetscCall(PetscSFGatherEnd(sf, MPIU_INT, outdata, indata));
421:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Gathered data at multi-roots from leaves\n"));
422:     PetscCall(PetscIntView(inedges, indata, PETSC_VIEWER_STDOUT_WORLD));
423:     PetscCall(PetscFree2(indata, outdata));
424:   }

426:   if (test_scatter) {
427:     const PetscInt *degree;
428:     PetscInt        j, count, inedges, *indata, *outdata;
429:     PetscCall(PetscSFComputeDegreeBegin(sf, &degree));
430:     PetscCall(PetscSFComputeDegreeEnd(sf, &degree));
431:     for (i = 0, inedges = 0; i < nrootsalloc; i++) inedges += degree[i];
432:     PetscCall(PetscMalloc2(inedges, &indata, nleavesalloc, &outdata));
433:     for (i = 0; i < nleavesalloc; i++) outdata[i] = -1;
434:     for (i = 0, count = 0; i < nrootsalloc; i++) {
435:       for (j = 0; j < degree[i]; j++) indata[count++] = 1000 * (rank + 1) + 100 * i + j;
436:     }
437:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Data at multi-roots, to scatter to leaves\n"));
438:     PetscCall(PetscIntView(inedges, indata, PETSC_VIEWER_STDOUT_WORLD));

440:     PetscCall(PetscSFScatterBegin(sf, MPIU_INT, indata, outdata));
441:     PetscCall(PetscSFScatterEnd(sf, MPIU_INT, indata, outdata));
442:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Scattered data at leaves\n"));
443:     PetscCall(PetscIntView(nleavesalloc, outdata, PETSC_VIEWER_STDOUT_WORLD));
444:     PetscCall(PetscFree2(indata, outdata));
445:   }

447:   if (test_embed) {
448:     const PetscInt nroots = 1 + (PetscInt)(rank == 0);
449:     PetscInt       selected[2];
450:     PetscSF        esf;

452:     selected[0] = stride;
453:     selected[1] = 2 * stride;
454:     PetscCall(PetscSFCreateEmbeddedRootSF(sf, nroots, selected, &esf));
455:     PetscCall(PetscSFSetUp(esf));
456:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Embedded PetscSF\n"));
457:     PetscCall(PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD, PETSC_VIEWER_ASCII_INFO_DETAIL));
458:     PetscCall(PetscSFView(esf, PETSC_VIEWER_STDOUT_WORLD));
459:     PetscCall(PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD));
460:     PetscCall(PetscSFDestroy(&esf));
461:   }

463:   if (test_invert) {
464:     const PetscInt *degree;
465:     PetscInt       *mRootsOrigNumbering;
466:     PetscInt        inedges;
467:     PetscSF         msf, imsf;

469:     PetscCall(PetscSFGetMultiSF(sf, &msf));
470:     PetscCall(PetscSFCreateInverseSF(msf, &imsf));
471:     PetscCall(PetscSFSetUp(msf));
472:     PetscCall(PetscSFSetUp(imsf));
473:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Multi-SF\n"));
474:     PetscCall(PetscSFView(msf, PETSC_VIEWER_STDOUT_WORLD));
475:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Multi-SF roots indices in original SF roots numbering\n"));
476:     PetscCall(PetscSFComputeDegreeBegin(sf, &degree));
477:     PetscCall(PetscSFComputeDegreeEnd(sf, &degree));
478:     PetscCall(PetscSFComputeMultiRootOriginalNumbering(sf, degree, &inedges, &mRootsOrigNumbering));
479:     PetscCall(PetscIntView(inedges, mRootsOrigNumbering, PETSC_VIEWER_STDOUT_WORLD));
480:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Inverse of Multi-SF\n"));
481:     PetscCall(PetscSFView(imsf, PETSC_VIEWER_STDOUT_WORLD));
482:     PetscCall(PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD, "## Inverse of Multi-SF, original numbering\n"));
483:     PetscCall(PetscSFViewCustomLocals_Private(imsf, mRootsOrigNumbering, PETSC_VIEWER_STDOUT_WORLD));
484:     PetscCall(PetscSFDestroy(&imsf));
485:     PetscCall(PetscFree(mRootsOrigNumbering));
486:   }

488:   /* Clean storage for star forest. */
489:   PetscCall(PetscSFDestroy(&sf));
490:   PetscCall(PetscFinalize());
491:   return 0;
492: }

494: /*TEST

496:    test:
497:       nsize: 4
498:       filter: grep -v "type" | grep -v "sort"
499:       args: -test_bcast -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}}
500:       requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)

502:    test:
503:       suffix: 2
504:       nsize: 4
505:       filter: grep -v "type" | grep -v "sort"
506:       args: -test_reduce -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}}
507:       requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)

509:    test:
510:       suffix: 2_basic
511:       nsize: 4
512:       args: -test_reduce -sf_type basic

514:    test:
515:       suffix: 3
516:       nsize: 4
517:       filter: grep -v "type" | grep -v "sort"
518:       args: -test_degree -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}}
519:       requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)

521:    test:
522:       suffix: 3_basic
523:       nsize: 4
524:       args: -test_degree -sf_type basic

526:    test:
527:       suffix: 4
528:       nsize: 4
529:       filter: grep -v "type" | grep -v "sort"
530:       args: -test_gather -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}}
531:       requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)

533:    test:
534:       suffix: 4_basic
535:       nsize: 4
536:       args: -test_gather -sf_type basic

538:    test:
539:       suffix: 4_stride
540:       nsize: 4
541:       args: -test_gather -sf_type basic -stride 2

543:    test:
544:       suffix: 5
545:       nsize: 4
546:       filter: grep -v "type" | grep -v "sort"
547:       args: -test_scatter -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}}
548:       requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)

550:    test:
551:       suffix: 5_basic
552:       nsize: 4
553:       args: -test_scatter -sf_type basic

555:    test:
556:       suffix: 5_stride
557:       nsize: 4
558:       args: -test_scatter -sf_type basic -stride 2

560:    test:
561:       suffix: 6
562:       nsize: 4
563:       filter: grep -v "type" | grep -v "sort"
564:       # No -sf_window_flavor dynamic due to bug https://gitlab.com/petsc/petsc/issues/555
565:       args: -test_embed -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create allocate}}
566:       requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)

568:    test:
569:       suffix: 6_basic
570:       nsize: 4
571:       args: -test_embed -sf_type basic

573:    test:
574:       suffix: 7
575:       nsize: 4
576:       filter: grep -v "type" | grep -v "sort"
577:       args: -test_invert -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}}
578:       requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)

580:    test:
581:       suffix: 7_basic
582:       nsize: 4
583:       args: -test_invert -sf_type basic

585:    test:
586:       suffix: basic
587:       nsize: 4
588:       args: -test_bcast -sf_type basic
589:       output_file: output/ex1_1_basic.out

591:    test:
592:       suffix: bcastop_basic
593:       nsize: 4
594:       args: -test_bcastop -sf_type basic
595:       output_file: output/ex1_bcastop_basic.out

597:    test:
598:       suffix: 8
599:       nsize: 3
600:       filter: grep -v "type" | grep -v "sort"
601:       args: -test_bcast -test_sf_distribute -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}}
602:       requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)

604:    test:
605:       suffix: 8_basic
606:       nsize: 3
607:       args: -test_bcast -test_sf_distribute -sf_type basic

609:    test:
610:       suffix: 9_char
611:       nsize: 4
612:       args: -sf_type basic -test_bcast -test_reduce -test_op max -test_char

614:    # Here we do not test -sf_window_flavor dynamic since it is designed for repeated SFs with few different rootdata pointers
615:    test:
616:       suffix: 10
617:       filter: grep -v "type" | grep -v "sort"
618:       nsize: 4
619:       args: -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create allocate}} -test_all -test_bcastop 0 -test_fetchandop 0
620:       requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)

622:    # The nightly test suite with MPICH uses ch3:sock, which is broken when winsize == 0 in some of the processes
623:    test:
624:       suffix: 10_shared
625:       output_file: output/ex1_10.out
626:       filter: grep -v "type" | grep -v "sort"
627:       nsize: 4
628:       args: -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor shared -test_all -test_bcastop 0 -test_fetchandop 0
629:       requires: defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY) !defined(PETSC_HAVE_MPICH) defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)

631:    test:
632:       suffix: 10_basic
633:       nsize: 4
634:       args: -sf_type basic -test_all -test_bcastop 0 -test_fetchandop 0

636:    test:
637:       suffix: 10_basic_vector
638:       nsize: 4
639:       args: -sf_type basic -test_all -test_bcastop 0 -test_fetchandop 0 -test_vector

641: TEST*/