Actual source code: hem.c
1: #include <petsc/private/matimpl.h>
2: #include <../src/mat/impls/aij/seq/aij.h>
3: #include <../src/mat/impls/aij/mpi/mpiaij.h>
4: #include <petscdm.h>
6: /* linked list methods
7: *
8: * PetscCDCreate
9: */
10: PetscErrorCode PetscCDCreate(PetscInt a_size, PetscCoarsenData **a_out)
11: {
12: PetscCoarsenData *ail;
14: PetscFunctionBegin;
15: /* allocate pool, partially */
16: PetscCall(PetscNew(&ail));
17: *a_out = ail;
18: ail->pool_list.next = NULL;
19: ail->pool_list.array = NULL;
20: ail->chk_sz = 0;
21: /* allocate array */
22: ail->size = a_size;
23: PetscCall(PetscCalloc1(a_size, &ail->array));
24: ail->extra_nodes = NULL;
25: ail->mat = NULL;
26: PetscFunctionReturn(PETSC_SUCCESS);
27: }
29: /* PetscCDDestroy
30: */
31: PetscErrorCode PetscCDDestroy(PetscCoarsenData *ail)
32: {
33: PetscCDArrNd *n = &ail->pool_list;
35: PetscFunctionBegin;
36: n = n->next;
37: while (n) {
38: PetscCDArrNd *lstn = n;
40: n = n->next;
41: PetscCall(PetscFree(lstn));
42: }
43: if (ail->pool_list.array) PetscCall(PetscFree(ail->pool_list.array));
44: PetscCall(PetscFree(ail->array));
45: if (ail->mat) PetscCall(MatDestroy(&ail->mat));
46: /* delete this (+agg+pool array) */
47: PetscCall(PetscFree(ail));
48: PetscFunctionReturn(PETSC_SUCCESS);
49: }
51: /* PetscCDSetChunkSize
52: */
53: PetscErrorCode PetscCDSetChunkSize(PetscCoarsenData *ail, PetscInt a_sz)
54: {
55: PetscFunctionBegin;
56: ail->chk_sz = a_sz;
57: PetscFunctionReturn(PETSC_SUCCESS);
58: }
60: /* PetscCDGetNewNode
61: */
62: static PetscErrorCode PetscCDGetNewNode(PetscCoarsenData *ail, PetscCDIntNd **a_out, PetscInt a_id)
63: {
64: PetscFunctionBegin;
65: *a_out = NULL; /* squelch -Wmaybe-uninitialized */
66: if (ail->extra_nodes) {
67: PetscCDIntNd *node = ail->extra_nodes;
69: ail->extra_nodes = node->next;
70: node->gid = a_id;
71: node->next = NULL;
72: *a_out = node;
73: } else {
74: if (!ail->pool_list.array) {
75: if (!ail->chk_sz) ail->chk_sz = 10; /* use a chuck size of ail->size? */
76: PetscCall(PetscMalloc1(ail->chk_sz, &ail->pool_list.array));
77: ail->new_node = ail->pool_list.array;
78: ail->new_left = ail->chk_sz;
79: ail->new_node->next = NULL;
80: } else if (!ail->new_left) {
81: PetscCDArrNd *node;
83: PetscCall(PetscMalloc(ail->chk_sz * sizeof(PetscCDIntNd) + sizeof(PetscCDArrNd), &node));
84: node->array = (PetscCDIntNd *)(node + 1);
85: node->next = ail->pool_list.next;
86: ail->pool_list.next = node;
87: ail->new_left = ail->chk_sz;
88: ail->new_node = node->array;
89: }
90: ail->new_node->gid = a_id;
91: ail->new_node->next = NULL;
92: *a_out = ail->new_node++;
93: ail->new_left--;
94: }
95: PetscFunctionReturn(PETSC_SUCCESS);
96: }
98: /* PetscCDIntNdSetID
99: */
100: PetscErrorCode PetscCDIntNdSetID(PetscCDIntNd *a_this, PetscInt a_id)
101: {
102: PetscFunctionBegin;
103: a_this->gid = a_id;
104: PetscFunctionReturn(PETSC_SUCCESS);
105: }
107: /* PetscCDIntNdGetID
108: */
109: PetscErrorCode PetscCDIntNdGetID(const PetscCDIntNd *a_this, PetscInt *a_gid)
110: {
111: PetscFunctionBegin;
112: *a_gid = a_this->gid;
113: PetscFunctionReturn(PETSC_SUCCESS);
114: }
116: /* PetscCDGetHeadPos
117: */
118: PetscErrorCode PetscCDGetHeadPos(const PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd **pos)
119: {
120: PetscFunctionBegin;
121: PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_idx >= ail->size: a_idx=%" PetscInt_FMT ".", a_idx);
122: *pos = ail->array[a_idx];
123: PetscFunctionReturn(PETSC_SUCCESS);
124: }
126: /* PetscCDGetNextPos
127: */
128: PetscErrorCode PetscCDGetNextPos(const PetscCoarsenData *ail, PetscInt l_idx, PetscCDIntNd **pos)
129: {
130: PetscFunctionBegin;
131: PetscCheck(*pos, PETSC_COMM_SELF, PETSC_ERR_PLIB, "NULL input position.");
132: *pos = (*pos)->next;
133: PetscFunctionReturn(PETSC_SUCCESS);
134: }
136: /* PetscCDAppendID
137: */
138: PetscErrorCode PetscCDAppendID(PetscCoarsenData *ail, PetscInt a_idx, PetscInt a_id)
139: {
140: PetscCDIntNd *n, *n2;
142: PetscFunctionBegin;
143: PetscCall(PetscCDGetNewNode(ail, &n, a_id));
144: PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
145: if (!(n2 = ail->array[a_idx])) ail->array[a_idx] = n;
146: else {
147: do {
148: if (!n2->next) {
149: n2->next = n;
150: PetscCheck(!n->next, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n should not have a next");
151: break;
152: }
153: n2 = n2->next;
154: } while (n2);
155: PetscCheck(n2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n2 should be non-null");
156: }
157: PetscFunctionReturn(PETSC_SUCCESS);
158: }
160: /* PetscCDAppendNode
161: */
162: PetscErrorCode PetscCDAppendNode(PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd *a_n)
163: {
164: PetscCDIntNd *n2;
166: PetscFunctionBegin;
167: PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
168: if (!(n2 = ail->array[a_idx])) ail->array[a_idx] = a_n;
169: else {
170: do {
171: if (!n2->next) {
172: n2->next = a_n;
173: a_n->next = NULL;
174: break;
175: }
176: n2 = n2->next;
177: } while (n2);
178: PetscCheck(n2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n2 should be non-null");
179: }
180: PetscFunctionReturn(PETSC_SUCCESS);
181: }
183: /* PetscCDRemoveNextNode: a_last->next, this exposes single linked list structure to API (not used)
184: */
185: PetscErrorCode PetscCDRemoveNextNode(PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd *a_last)
186: {
187: PetscCDIntNd *del;
189: PetscFunctionBegin;
190: PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
191: PetscCheck(a_last->next, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_last should have a next");
192: del = a_last->next;
193: a_last->next = del->next;
194: /* del->next = NULL; -- this still used in a iterator so keep it intact -- need to fix this with a double linked list */
195: /* could reuse n2 but PetscCDAppendNode sometimes uses it */
196: PetscFunctionReturn(PETSC_SUCCESS);
197: }
199: /* PetscCDPrint
200: */
201: PetscErrorCode PetscCDPrint(const PetscCoarsenData *ail, PetscInt Istart, MPI_Comm comm)
202: {
203: PetscCDIntNd *n, *n2;
204: PetscInt ii;
206: PetscFunctionBegin;
207: for (ii = 0; ii < ail->size; ii++) {
208: n2 = n = ail->array[ii];
209: if (n) PetscCall(PetscSynchronizedPrintf(comm, "list %" PetscInt_FMT ":", ii + Istart));
210: while (n) {
211: PetscCall(PetscSynchronizedPrintf(comm, " %" PetscInt_FMT, n->gid));
212: n = n->next;
213: }
214: if (n2) PetscCall(PetscSynchronizedPrintf(comm, "\n"));
215: }
216: PetscCall(PetscSynchronizedFlush(comm, PETSC_STDOUT));
217: PetscFunctionReturn(PETSC_SUCCESS);
218: }
220: /* PetscCDMoveAppend - take list in a_srcidx and appends to destidx
221: */
222: PetscErrorCode PetscCDMoveAppend(PetscCoarsenData *ail, PetscInt a_destidx, PetscInt a_srcidx)
223: {
224: PetscCDIntNd *n;
226: PetscFunctionBegin;
227: PetscCheck(a_srcidx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_srcidx);
228: PetscCheck(a_destidx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_destidx);
229: PetscCheck(a_destidx != a_srcidx, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_destidx==a_srcidx %" PetscInt_FMT ".", a_destidx);
230: n = ail->array[a_destidx];
231: if (!n) ail->array[a_destidx] = ail->array[a_srcidx];
232: else {
233: do {
234: if (!n->next) {
235: n->next = ail->array[a_srcidx]; // append
236: break;
237: }
238: n = n->next;
239: } while (1);
240: }
241: ail->array[a_srcidx] = NULL; // empty
242: PetscFunctionReturn(PETSC_SUCCESS);
243: }
245: /* PetscCDRemoveAllAt - empty one list and move data to cache
246: */
247: PetscErrorCode PetscCDRemoveAllAt(PetscCoarsenData *ail, PetscInt a_idx)
248: {
249: PetscCDIntNd *rem, *n1;
251: PetscFunctionBegin;
252: PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
253: rem = ail->array[a_idx];
254: ail->array[a_idx] = NULL;
255: if (!(n1 = ail->extra_nodes)) ail->extra_nodes = rem;
256: else {
257: while (n1->next) n1 = n1->next;
258: n1->next = rem;
259: }
260: PetscFunctionReturn(PETSC_SUCCESS);
261: }
263: /* PetscCDCountAt
264: */
265: PetscErrorCode PetscCDCountAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscInt *a_sz)
266: {
267: PetscCDIntNd *n1;
268: PetscInt sz = 0;
270: PetscFunctionBegin;
271: PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
272: n1 = ail->array[a_idx];
273: while (n1) {
274: n1 = n1->next;
275: sz++;
276: }
277: *a_sz = sz;
278: PetscFunctionReturn(PETSC_SUCCESS);
279: }
281: /* PetscCDSize
282: */
283: PetscErrorCode PetscCDCount(const PetscCoarsenData *ail, PetscInt *a_sz)
284: {
285: PetscInt sz = 0;
287: PetscFunctionBegin;
288: for (PetscInt ii = 0; ii < ail->size; ii++) {
289: PetscCDIntNd *n1 = ail->array[ii];
291: while (n1) {
292: n1 = n1->next;
293: sz++;
294: }
295: }
296: *a_sz = sz;
297: PetscFunctionReturn(PETSC_SUCCESS);
298: }
300: /* PetscCDIsEmptyAt - Is the list empty? (not used)
301: */
302: PetscErrorCode PetscCDIsEmptyAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscBool *a_e)
303: {
304: PetscFunctionBegin;
305: PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
306: *a_e = (PetscBool)(ail->array[a_idx] == NULL);
307: PetscFunctionReturn(PETSC_SUCCESS);
308: }
310: /* PetscCDGetNonemptyIS - used for C-F methods
311: */
312: PetscErrorCode PetscCDGetNonemptyIS(PetscCoarsenData *ail, IS *a_mis)
313: {
314: PetscCDIntNd *n;
315: PetscInt ii, kk;
316: PetscInt *permute;
318: PetscFunctionBegin;
319: for (ii = kk = 0; ii < ail->size; ii++) {
320: n = ail->array[ii];
321: if (n) kk++;
322: }
323: PetscCall(PetscMalloc1(kk, &permute));
324: for (ii = kk = 0; ii < ail->size; ii++) {
325: n = ail->array[ii];
326: if (n) permute[kk++] = ii;
327: }
328: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, kk, permute, PETSC_OWN_POINTER, a_mis));
329: PetscFunctionReturn(PETSC_SUCCESS);
330: }
332: /* PetscCDGetMat
333: */
334: PetscErrorCode PetscCDGetMat(PetscCoarsenData *ail, Mat *a_mat)
335: {
336: PetscFunctionBegin;
337: *a_mat = ail->mat;
338: PetscFunctionReturn(PETSC_SUCCESS);
339: }
341: /* PetscCDSetMat
342: */
343: PetscErrorCode PetscCDSetMat(PetscCoarsenData *ail, Mat a_mat)
344: {
345: PetscFunctionBegin;
346: if (ail->mat) {
347: PetscCall(MatDestroy(&ail->mat)); //should not happen
348: }
349: ail->mat = a_mat;
350: PetscFunctionReturn(PETSC_SUCCESS);
351: }
353: /* PetscCDClearMat
354: */
355: PetscErrorCode PetscCDClearMat(PetscCoarsenData *ail)
356: {
357: PetscFunctionBegin;
358: ail->mat = NULL;
359: PetscFunctionReturn(PETSC_SUCCESS);
360: }
362: /* PetscCDGetASMBlocks - get IS of aggregates for ASM smoothers
363: */
364: PetscErrorCode PetscCDGetASMBlocks(const PetscCoarsenData *ail, const PetscInt a_bs, PetscInt *a_sz, IS **a_local_is)
365: {
366: PetscCDIntNd *n;
367: PetscInt lsz, ii, kk, *idxs, jj, gid;
368: IS *is_loc = NULL;
370: PetscFunctionBegin;
371: for (ii = kk = 0; ii < ail->size; ii++) {
372: if (ail->array[ii]) kk++;
373: }
374: *a_sz = kk;
375: PetscCall(PetscMalloc1(kk, &is_loc));
376: for (ii = kk = 0; ii < ail->size; ii++) {
377: for (lsz = 0, n = ail->array[ii]; n; lsz++, n = n->next) /* void */
378: ;
379: if (lsz) {
380: PetscCall(PetscMalloc1(a_bs * lsz, &idxs));
381: for (lsz = 0, n = ail->array[ii]; n; n = n->next) {
382: PetscCall(PetscCDIntNdGetID(n, &gid));
383: for (jj = 0; jj < a_bs; lsz++, jj++) idxs[lsz] = a_bs * gid + jj;
384: }
385: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, lsz, idxs, PETSC_OWN_POINTER, &is_loc[kk++]));
386: }
387: }
388: PetscCheck(*a_sz == kk, PETSC_COMM_SELF, PETSC_ERR_PLIB, "*a_sz %" PetscInt_FMT " != kk %" PetscInt_FMT, *a_sz, kk);
389: *a_local_is = is_loc; /* out */
390: PetscFunctionReturn(PETSC_SUCCESS);
391: }
393: /* edge for priority queue */
394: typedef struct edge_tag {
395: PetscReal weight;
396: PetscInt lid0, gid1, ghost1_idx;
397: } Edge;
399: #define MY_MEPS (PETSC_MACHINE_EPSILON * 100)
400: static int gamg_hem_compare(const void *a, const void *b)
401: {
402: PetscReal va = ((Edge *)a)->weight, vb = ((Edge *)b)->weight;
403: return (va <= vb - MY_MEPS) ? 1 : (va > vb + MY_MEPS) ? -1 : 0; /* 0 for equal */
404: }
406: /*
407: MatCoarsenApply_HEM_private - parallel heavy edge matching
409: Input Parameter:
410: . a_Gmat - global matrix of the graph
411: . n_iter - number of matching iterations
412: . threshold - threshold for filtering graphs
414: Output Parameter:
415: . a_locals_llist - array of list of local nodes rooted at local node
416: */
417: static PetscErrorCode MatCoarsenApply_HEM_private(Mat a_Gmat, const PetscInt n_iter, const PetscReal threshold, PetscCoarsenData **a_locals_llist)
418: {
419: #define REQ_BF_SIZE 100
420: PetscBool isMPI;
421: MPI_Comm comm;
422: PetscInt ix, *ii, *aj, Istart, bc_agg = -1, *rbuff = NULL, rbuff_sz = 0;
423: PetscMPIInt rank, size, comm_procs[REQ_BF_SIZE], ncomm_procs, *lid_max_pe;
424: const PetscInt nloc = a_Gmat->rmap->n, request_size = PetscCeilInt((int)sizeof(MPI_Request), (int)sizeof(PetscInt));
425: PetscInt *lid_cprowID;
426: PetscBool *lid_matched;
427: Mat_SeqAIJ *matA, *matB = NULL;
428: Mat_MPIAIJ *mpimat = NULL;
429: PetscScalar one = 1.;
430: PetscCoarsenData *agg_llists = NULL, *ghost_deleted_list = NULL, *bc_list = NULL;
431: Mat cMat, tMat, P;
432: MatScalar *ap;
433: IS info_is;
435: PetscFunctionBegin;
436: PetscCall(PetscObjectGetComm((PetscObject)a_Gmat, &comm));
437: PetscCallMPI(MPI_Comm_rank(comm, &rank));
438: PetscCallMPI(MPI_Comm_size(comm, &size));
439: PetscCall(MatGetOwnershipRange(a_Gmat, &Istart, NULL));
440: PetscCall(ISCreate(comm, &info_is));
441: PetscCall(PetscInfo(info_is, "Start %" PetscInt_FMT " iterations of HEM.\n", n_iter));
443: PetscCall(PetscMalloc3(nloc, &lid_matched, nloc, &lid_cprowID, nloc, &lid_max_pe));
444: PetscCall(PetscCDCreate(nloc, &agg_llists));
445: PetscCall(PetscCDSetChunkSize(agg_llists, nloc + 1));
446: *a_locals_llist = agg_llists;
447: /* add self to all lists */
448: for (PetscInt kk = 0; kk < nloc; kk++) PetscCall(PetscCDAppendID(agg_llists, kk, Istart + kk));
449: /* make a copy of the graph, this gets destroyed in iterates */
450: PetscCall(MatDuplicate(a_Gmat, MAT_COPY_VALUES, &cMat));
451: PetscCall(MatConvert(cMat, MATAIJ, MAT_INPLACE_MATRIX, &cMat));
452: isMPI = (PetscBool)(size > 1);
453: if (isMPI) {
454: /* list of deleted ghosts, should compress this */
455: PetscCall(PetscCDCreate(size, &ghost_deleted_list));
456: PetscCall(PetscCDSetChunkSize(ghost_deleted_list, 100));
457: }
458: for (PetscInt iter = 0; iter < n_iter; iter++) {
459: const PetscScalar *lghost_max_ew, *lid_max_ew;
460: PetscBool *lghost_matched;
461: PetscMPIInt *lghost_pe, *lghost_max_pe;
462: Vec locMaxEdge, ghostMaxEdge, ghostMaxPE, locMaxPE;
463: PetscInt *lghost_gid, nEdges, nEdges0, num_ghosts = 0;
464: Edge *Edges;
465: const PetscInt n_sub_its = 1000; // in case of a bug, stop at some point
467: /* get submatrices of cMat */
468: for (PetscInt kk = 0; kk < nloc; kk++) lid_cprowID[kk] = -1;
469: if (isMPI) {
470: mpimat = (Mat_MPIAIJ *)cMat->data;
471: matA = (Mat_SeqAIJ *)mpimat->A->data;
472: matB = (Mat_SeqAIJ *)mpimat->B->data;
473: if (!matB->compressedrow.use) {
474: /* force construction of compressed row data structure since code below requires it */
475: PetscCall(MatCheckCompressedRow(mpimat->B, matB->nonzerorowcnt, &matB->compressedrow, matB->i, mpimat->B->rmap->n, -1.0));
476: }
477: /* set index into compressed row 'lid_cprowID' */
478: for (ix = 0; ix < matB->compressedrow.nrows; ix++) {
479: PetscInt *ridx = matB->compressedrow.rindex, lid = ridx[ix];
480: if (ridx[ix] >= 0) lid_cprowID[lid] = ix;
481: }
482: } else {
483: matA = (Mat_SeqAIJ *)cMat->data;
484: }
485: /* set matched flags: true for empty list */
486: for (PetscInt kk = 0; kk < nloc; kk++) {
487: PetscCall(PetscCDCountAt(agg_llists, kk, &ix));
488: if (ix > 0) lid_matched[kk] = PETSC_FALSE;
489: else lid_matched[kk] = PETSC_TRUE; // call deleted gids as matched
490: }
491: /* max edge and pe vecs */
492: PetscCall(MatCreateVecs(cMat, &locMaxEdge, NULL));
493: PetscCall(MatCreateVecs(cMat, &locMaxPE, NULL));
494: /* get 'lghost_pe' & 'lghost_gid' & init. 'lghost_matched' using 'mpimat->lvec' */
495: if (isMPI) {
496: Vec vec;
497: PetscScalar vval;
498: const PetscScalar *buf;
500: PetscCall(MatCreateVecs(cMat, &vec, NULL));
501: PetscCall(VecGetLocalSize(mpimat->lvec, &num_ghosts));
502: /* lghost_matched */
503: for (PetscInt kk = 0, gid = Istart; kk < nloc; kk++, gid++) {
504: PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0;
506: PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES));
507: }
508: PetscCall(VecAssemblyBegin(vec));
509: PetscCall(VecAssemblyEnd(vec));
510: PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
511: PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
512: PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'buf' */
513: PetscCall(PetscMalloc4(num_ghosts, &lghost_matched, num_ghosts, &lghost_pe, num_ghosts, &lghost_gid, num_ghosts, &lghost_max_pe));
515: for (PetscInt kk = 0; kk < num_ghosts; kk++) {
516: lghost_matched[kk] = (PetscBool)(PetscRealPart(buf[kk]) != 0); // the proc of the ghost for now
517: }
518: PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf));
519: /* lghost_pe */
520: vval = (PetscScalar)rank;
521: for (PetscInt kk = 0, gid = Istart; kk < nloc; kk++, gid++) PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
522: PetscCall(VecAssemblyBegin(vec));
523: PetscCall(VecAssemblyEnd(vec));
524: PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
525: PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
526: PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'buf' */
527: for (PetscInt kk = 0; kk < num_ghosts; kk++) lghost_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the proc of the ghost for now
528: PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf));
529: /* lghost_gid */
530: for (PetscInt kk = 0, gid = Istart; kk < nloc; kk++, gid++) {
531: vval = (PetscScalar)gid;
533: PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
534: }
535: PetscCall(VecAssemblyBegin(vec));
536: PetscCall(VecAssemblyEnd(vec));
537: PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
538: PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
539: PetscCall(VecDestroy(&vec));
540: PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'lghost_gid' */
541: for (PetscInt kk = 0; kk < num_ghosts; kk++) lghost_gid[kk] = (PetscInt)PetscRealPart(buf[kk]);
542: PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf));
543: }
544: // get 'comm_procs' (could hoist)
545: for (PetscInt kk = 0; kk < REQ_BF_SIZE; kk++) comm_procs[kk] = -1;
546: for (ix = 0, ncomm_procs = 0; ix < num_ghosts; ix++) {
547: PetscMPIInt proc = lghost_pe[ix], idx = -1;
549: for (PetscMPIInt k = 0; k < ncomm_procs && idx == -1; k++)
550: if (comm_procs[k] == proc) idx = k;
551: if (idx == -1) comm_procs[ncomm_procs++] = proc;
552: PetscCheck(ncomm_procs != REQ_BF_SIZE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Receive request array too small: %d", ncomm_procs);
553: }
554: /* count edges, compute initial 'locMaxEdge', 'locMaxPE' */
555: nEdges0 = 0;
556: for (PetscInt kk = 0, gid = Istart; kk < nloc; kk++, gid++) {
557: PetscReal max_e = 0., tt;
558: PetscScalar vval;
559: PetscInt lid = kk, max_pe = rank, pe, n;
561: ii = matA->i;
562: n = ii[lid + 1] - ii[lid];
563: aj = PetscSafePointerPlusOffset(matA->j, ii[lid]);
564: ap = PetscSafePointerPlusOffset(matA->a, ii[lid]);
565: for (PetscInt jj = 0; jj < n; jj++) {
566: PetscInt lidj = aj[jj];
568: if ((tt = PetscRealPart(ap[jj])) > threshold && lidj != lid) {
569: if (tt > max_e) max_e = tt;
570: if (lidj > lid) nEdges0++;
571: }
572: }
573: if ((ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
574: ii = matB->compressedrow.i;
575: n = ii[ix + 1] - ii[ix];
576: ap = matB->a + ii[ix];
577: aj = matB->j + ii[ix];
578: for (PetscInt jj = 0; jj < n; jj++) {
579: if ((tt = PetscRealPart(ap[jj])) > threshold) {
580: if (tt > max_e) max_e = tt;
581: nEdges0++;
582: if ((pe = lghost_pe[aj[jj]]) > max_pe) max_pe = pe;
583: }
584: }
585: }
586: vval = max_e;
587: PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES));
588: vval = (PetscScalar)max_pe;
589: PetscCall(VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES));
590: if (iter == 0 && max_e <= MY_MEPS) { // add BCs to fake aggregate
591: lid_matched[lid] = PETSC_TRUE;
592: if (bc_agg == -1) {
593: bc_agg = lid;
594: PetscCall(PetscCDCreate(1, &bc_list));
595: }
596: PetscCall(PetscCDRemoveAllAt(agg_llists, lid));
597: PetscCall(PetscCDAppendID(bc_list, 0, Istart + lid));
598: }
599: }
600: PetscCall(VecAssemblyBegin(locMaxEdge));
601: PetscCall(VecAssemblyEnd(locMaxEdge));
602: PetscCall(VecAssemblyBegin(locMaxPE));
603: PetscCall(VecAssemblyEnd(locMaxPE));
604: /* make 'ghostMaxEdge_max_ew', 'lghost_max_pe' */
605: if (isMPI) {
606: const PetscScalar *buf;
608: PetscCall(VecDuplicate(mpimat->lvec, &ghostMaxEdge));
609: PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
610: PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
612: PetscCall(VecDuplicate(mpimat->lvec, &ghostMaxPE));
613: PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
614: PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
615: PetscCall(VecGetArrayRead(ghostMaxPE, &buf));
616: for (PetscInt kk = 0; kk < num_ghosts; kk++) lghost_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now
617: PetscCall(VecRestoreArrayRead(ghostMaxPE, &buf));
618: }
619: { // make lid_max_pe
620: const PetscScalar *buf;
622: PetscCall(VecGetArrayRead(locMaxPE, &buf));
623: for (PetscInt kk = 0; kk < nloc; kk++) lid_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now
624: PetscCall(VecRestoreArrayRead(locMaxPE, &buf));
625: }
626: /* setup sorted list of edges, and make 'Edges' */
627: PetscCall(PetscMalloc1(nEdges0, &Edges));
628: nEdges = 0;
629: for (PetscInt kk = 0, n; kk < nloc; kk++) {
630: const PetscInt lid = kk;
631: PetscReal tt;
633: ii = matA->i;
634: n = ii[lid + 1] - ii[lid];
635: aj = PetscSafePointerPlusOffset(matA->j, ii[lid]);
636: ap = PetscSafePointerPlusOffset(matA->a, ii[lid]);
637: for (PetscInt jj = 0; jj < n; jj++) {
638: PetscInt lidj = aj[jj];
640: if ((tt = PetscRealPart(ap[jj])) > threshold && lidj != lid) {
641: if (lidj > lid) {
642: Edges[nEdges].lid0 = lid;
643: Edges[nEdges].gid1 = lidj + Istart;
644: Edges[nEdges].ghost1_idx = -1;
645: Edges[nEdges].weight = tt;
646: nEdges++;
647: }
648: }
649: }
650: if ((ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbor */
651: ii = matB->compressedrow.i;
652: n = ii[ix + 1] - ii[ix];
653: ap = matB->a + ii[ix];
654: aj = matB->j + ii[ix];
655: for (PetscInt jj = 0; jj < n; jj++) {
656: if ((tt = PetscRealPart(ap[jj])) > threshold) {
657: Edges[nEdges].lid0 = lid;
658: Edges[nEdges].gid1 = lghost_gid[aj[jj]];
659: Edges[nEdges].ghost1_idx = aj[jj];
660: Edges[nEdges].weight = tt;
661: nEdges++;
662: }
663: }
664: }
665: }
666: PetscCheck(nEdges == nEdges0, PETSC_COMM_SELF, PETSC_ERR_SUP, "nEdges != nEdges0: %" PetscInt_FMT " %" PetscInt_FMT, nEdges0, nEdges);
667: if (Edges) qsort(Edges, nEdges, sizeof(Edge), gamg_hem_compare);
669: PetscCall(PetscInfo(info_is, "[%d] HEM iteration %" PetscInt_FMT " with %" PetscInt_FMT " edges\n", rank, iter, nEdges));
671: /* projection matrix */
672: PetscCall(MatCreate(comm, &P));
673: PetscCall(MatSetType(P, MATAIJ));
674: PetscCall(MatSetSizes(P, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
675: PetscCall(MatMPIAIJSetPreallocation(P, 1, NULL, 1, NULL));
676: PetscCall(MatSeqAIJSetPreallocation(P, 1, NULL));
677: PetscCall(MatSetUp(P));
678: /* process - communicate - process */
679: for (PetscInt sub_it = 0, old_num_edge = 0; /* sub_it < n_sub_its */; /* sub_it++ */) {
680: PetscInt nactive_edges = 0, n_act_n[3], gn_act_n[3];
681: PetscMPIInt tag1, tag2;
683: PetscCall(VecGetArrayRead(locMaxEdge, &lid_max_ew));
684: if (isMPI) {
685: PetscCall(VecGetArrayRead(ghostMaxEdge, &lghost_max_ew));
686: PetscCall(PetscCommGetNewTag(comm, &tag1));
687: PetscCall(PetscCommGetNewTag(comm, &tag2));
688: }
689: for (PetscInt kk = 0; kk < nEdges; kk++) {
690: const Edge *e = &Edges[kk];
691: const PetscInt lid0 = e->lid0, gid1 = e->gid1, ghost1_idx = e->ghost1_idx, gid0 = lid0 + Istart, lid1 = gid1 - Istart;
692: PetscBool isOK = PETSC_TRUE, print = PETSC_FALSE;
694: if (print)
695: PetscCall(PetscSynchronizedPrintf(comm, "\t[%d] edge (%" PetscInt_FMT " %" PetscInt_FMT "), %s %s %s\n", rank, gid0, gid1, lid_matched[lid0] ? "true" : "false", (ghost1_idx != -1 && lghost_matched[ghost1_idx]) ? "true" : "false", (ghost1_idx == -1 && lid_matched[lid1]) ? "true" : "false"));
696: /* skip if either vertex is matched already */
697: if (lid_matched[lid0] || (ghost1_idx != -1 && lghost_matched[ghost1_idx]) || (ghost1_idx == -1 && lid_matched[lid1])) continue;
699: nactive_edges++;
700: PetscCheck(PetscRealPart(lid_max_ew[lid0]) >= e->weight - MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e", (double)e->weight, (double)PetscRealPart(lid_max_ew[lid0]));
701: if (print) PetscCall(PetscSynchronizedPrintf(comm, "\t[%d] active edge (%" PetscInt_FMT " %" PetscInt_FMT "), diff0 = %10.4e\n", rank, gid0, gid1, (double)(PetscRealPart(lid_max_ew[lid0]) - e->weight)));
702: // smaller edge, lid_max_ew get updated - e0
703: if (PetscRealPart(lid_max_ew[lid0]) > e->weight + MY_MEPS) {
704: if (print)
705: PetscCall(PetscSynchronizedPrintf(comm, "\t\t[%d] 1) e0 SKIPPING small edge %20.14e edge (%" PetscInt_FMT " %" PetscInt_FMT "), diff = %10.4e to proc %d. max = %20.14e, w = %20.14e\n", rank, (double)e->weight, gid0, gid1, (double)(PetscRealPart(lid_max_ew[lid0]) - e->weight), ghost1_idx != -1 ? lghost_pe[ghost1_idx] : rank, (double)PetscRealPart(lid_max_ew[lid0]),
706: (double)e->weight));
707: continue; // we are basically filter edges here
708: }
709: // e1 - local
710: if (ghost1_idx == -1) {
711: if (PetscRealPart(lid_max_ew[lid1]) > e->weight + MY_MEPS) {
712: if (print)
713: PetscCall(PetscSynchronizedPrintf(comm, "\t\t%c[%d] 2) e1 SKIPPING small local edge %20.14e edge (%" PetscInt_FMT " %" PetscInt_FMT "), diff = %10.4e\n", ghost1_idx != -1 ? '\t' : ' ', rank, (double)e->weight, gid0, gid1, (double)(PetscRealPart(lid_max_ew[lid1]) - e->weight)));
714: continue; // we are basically filter edges here
715: }
716: } else { // e1 - ghost
717: /* see if edge might get matched on other proc */
718: PetscReal g_max_e1 = PetscRealPart(lghost_max_ew[ghost1_idx]);
720: if (print)
721: PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t[%d] CHECK GHOST e1, edge (%" PetscInt_FMT " %" PetscInt_FMT "), E0 MAX EDGE WEIGHT = %10.4e, EDGE WEIGHT = %10.4e, diff1 = %10.4e, ghost proc %d with max pe %d on e0 and %d on e1\n", rank, gid0, gid1, (double)PetscRealPart(lid_max_ew[lid0]),
722: (double)e->weight, (double)(PetscRealPart(lghost_max_ew[ghost1_idx]) - e->weight), lghost_pe[ghost1_idx], lid_max_pe[lid0], lghost_max_pe[ghost1_idx]));
723: if (g_max_e1 > e->weight + MY_MEPS) {
724: /* PetscCall(PetscSynchronizedPrintf(comm,"\t\t\t\t[%d] 3) ghost e1 SKIPPING small edge (%d %d), diff = %10.4e from proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, gid0, gid1, g_max_e1 - e->weight, lghost_pe[ghost1_idx], lghost_max_pe[ghost1_idx], g_max_e1, e->weight )); */
725: continue;
726: } else if (g_max_e1 >= e->weight - MY_MEPS && lghost_pe[ghost1_idx] > rank) { // is 'lghost_max_pe[ghost1_idx] > rank' needed?
727: /* check for max_ea == to this edge and larger processor that will deal with this */
728: if (print)
729: PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t[%d] ghost e1 SKIPPING EQUAL (%" PetscInt_FMT " %" PetscInt_FMT "), diff = %10.4e from larger proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, gid0, gid1, (double)(PetscRealPart(lid_max_ew[lid0]) - e->weight), lghost_pe[ghost1_idx], lghost_max_pe[ghost1_idx], (double)g_max_e1,
730: (double)e->weight));
731: continue;
732: } else {
733: /* PetscCall(PetscSynchronizedPrintf(comm,"\t[%d] Edge (%d %d) passes gid0 tests, diff = %10.4e from proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, gid0, gid1, g_max_e1 - e->weight, lghost_pe[ghost1_idx], lghost_max_pe[ghost1_idx], g_max_e1, e->weight )); */
734: }
735: }
736: /* check ghost for v0 */
737: if (isOK) {
738: PetscReal max_e, ew;
740: if ((ix = lid_cprowID[lid0]) != -1) { /* if I have any ghost neighbors */
741: PetscInt n;
743: ii = matB->compressedrow.i;
744: n = ii[ix + 1] - ii[ix];
745: ap = matB->a + ii[ix];
746: aj = matB->j + ii[ix];
747: for (PetscInt jj = 0; jj < n && isOK; jj++) {
748: PetscInt lidj = aj[jj];
750: if (lghost_matched[lidj]) continue;
751: ew = PetscRealPart(ap[jj]);
752: if (ew <= threshold) continue;
753: max_e = PetscRealPart(lghost_max_ew[lidj]);
755: /* check for max_e == to this edge and larger processor that will deal with this */
756: if (ew >= PetscRealPart(lid_max_ew[lid0]) - MY_MEPS && lghost_max_pe[lidj] > rank) isOK = PETSC_FALSE;
757: PetscCheck(ew <= max_e + MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e. ncols = %" PetscInt_FMT ", gid0 = %" PetscInt_FMT ", gid1 = %" PetscInt_FMT, (double)PetscRealPart(ew), (double)PetscRealPart(max_e), n, lid0 + Istart, lghost_gid[lidj]);
758: if (print)
759: PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t\t[%d] e0: looked at ghost adj (%" PetscInt_FMT " %" PetscInt_FMT "), diff = %10.4e, ghost on proc %d (max %d). isOK = %d, %d %d %d; ew = %e, lid0 max ew = %e, diff = %e, eps = %e\n", rank, gid0, lghost_gid[lidj], (double)(max_e - ew), lghost_pe[lidj], lghost_max_pe[lidj], isOK, (double)ew >= (double)(max_e - MY_MEPS), ew >= PetscRealPart(lid_max_ew[lid0]) - MY_MEPS, lghost_pe[lidj] > rank, (double)ew, (double)PetscRealPart(lid_max_ew[lid0]), (double)(ew - PetscRealPart(lid_max_ew[lid0])), (double)MY_MEPS));
760: }
761: if (!isOK && print) PetscCall(PetscSynchronizedPrintf(comm, "\t\t[%d] skip edge (%" PetscInt_FMT " %" PetscInt_FMT ") from ghost inspection\n", rank, gid0, gid1));
762: }
763: /* check local v1 */
764: if (ghost1_idx == -1) {
765: if ((ix = lid_cprowID[lid1]) != -1) { /* if I have any ghost neighbors */
766: PetscInt n;
768: ii = matB->compressedrow.i;
769: n = ii[ix + 1] - ii[ix];
770: ap = matB->a + ii[ix];
771: aj = matB->j + ii[ix];
772: for (PetscInt jj = 0; jj < n && isOK; jj++) {
773: PetscInt lidj = aj[jj];
775: if (lghost_matched[lidj]) continue;
776: ew = PetscRealPart(ap[jj]);
777: if (ew <= threshold) continue;
778: max_e = PetscRealPart(lghost_max_ew[lidj]);
779: /* check for max_e == to this edge and larger processor that will deal with this */
780: if (ew >= PetscRealPart(lid_max_ew[lid1]) - MY_MEPS && lghost_max_pe[lidj] > rank) isOK = PETSC_FALSE;
781: PetscCheck(ew <= max_e + MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e", (double)PetscRealPart(ew), (double)PetscRealPart(max_e));
782: if (print)
783: PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t\t\t[%d] e1: looked at ghost adj (%" PetscInt_FMT " %" PetscInt_FMT "), diff = %10.4e, ghost on proc %d (max %d)\n", rank, gid0, lghost_gid[lidj], (double)(max_e - ew), lghost_pe[lidj], lghost_max_pe[lidj]));
784: }
785: }
786: if (!isOK && print) PetscCall(PetscSynchronizedPrintf(comm, "\t\t[%d] skip edge (%" PetscInt_FMT " %" PetscInt_FMT ") from ghost inspection\n", rank, gid0, gid1));
787: }
788: }
789: PetscReal e1_max_w = (ghost1_idx == -1 ? PetscRealPart(lid_max_ew[lid0]) : PetscRealPart(lghost_max_ew[ghost1_idx]));
790: if (print)
791: PetscCall(PetscSynchronizedPrintf(comm, "\t[%d] MATCHING (%" PetscInt_FMT " %" PetscInt_FMT ") e1 max weight = %e, e1 weight diff %e, %s. isOK = %d\n", rank, gid0, gid1, (double)e1_max_w, (double)(e1_max_w - e->weight), ghost1_idx == -1 ? "local" : "ghost", isOK));
792: /* do it */
793: if (isOK) {
794: if (ghost1_idx == -1) {
795: PetscCheck(!lid_matched[lid1], PETSC_COMM_SELF, PETSC_ERR_SUP, "local %" PetscInt_FMT " is matched", gid1);
796: lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */
797: PetscCall(PetscCDMoveAppend(agg_llists, lid0, lid1)); // takes lid1's list and appends to lid0's
798: } else {
799: /* add gid1 to list of ghost deleted by me -- I need their children */
800: PetscMPIInt proc = lghost_pe[ghost1_idx];
801: PetscCheck(!lghost_matched[ghost1_idx], PETSC_COMM_SELF, PETSC_ERR_SUP, "ghost %" PetscInt_FMT " is matched", lghost_gid[ghost1_idx]);
802: lghost_matched[ghost1_idx] = PETSC_TRUE;
803: PetscCall(PetscCDAppendID(ghost_deleted_list, proc, ghost1_idx)); /* cache to send messages */
804: PetscCall(PetscCDAppendID(ghost_deleted_list, proc, lid0));
805: }
806: lid_matched[lid0] = PETSC_TRUE; /* keep track of what we've done this round */
807: /* set projection */
808: PetscCall(MatSetValues(P, 1, &gid0, 1, &gid0, &one, INSERT_VALUES));
809: PetscCall(MatSetValues(P, 1, &gid1, 1, &gid0, &one, INSERT_VALUES));
810: //PetscCall(PetscPrintf(comm,"\t %" PetscInt_FMT ".%" PetscInt_FMT ") match active EDGE %" PetscInt_FMT " : (%" PetscInt_FMT " %" PetscInt_FMT ")\n",iter,sub_it, nactive_edges, gid0, gid1));
811: } /* matched */
812: } /* edge loop */
813: PetscCall(PetscSynchronizedFlush(comm, PETSC_STDOUT));
814: if (isMPI) PetscCall(VecRestoreArrayRead(ghostMaxEdge, &lghost_max_ew));
815: PetscCall(VecRestoreArrayRead(locMaxEdge, &lid_max_ew));
816: // count active for test, latter, update deleted ghosts
817: n_act_n[0] = nactive_edges;
818: if (ghost_deleted_list) PetscCall(PetscCDCount(ghost_deleted_list, &n_act_n[2]));
819: else n_act_n[2] = 0;
820: PetscCall(PetscCDCount(agg_llists, &n_act_n[1]));
821: PetscCallMPI(MPIU_Allreduce(n_act_n, gn_act_n, 3, MPIU_INT, MPI_SUM, comm));
822: PetscCall(PetscInfo(info_is, "[%d] %" PetscInt_FMT ".%" PetscInt_FMT ") nactive edges=%" PetscInt_FMT ", ncomm_procs=%d, nEdges=%" PetscInt_FMT ", %" PetscInt_FMT " deleted ghosts, N=%" PetscInt_FMT "\n", rank, iter, sub_it, gn_act_n[0], ncomm_procs, nEdges, gn_act_n[2], gn_act_n[1]));
823: /* deal with deleted ghost */
824: if (isMPI) {
825: PetscCDIntNd *pos;
826: PetscInt *sbuffs1[REQ_BF_SIZE], ndel;
827: PetscInt *sbuffs2[REQ_BF_SIZE];
828: MPI_Status status;
830: /* send deleted ghosts */
831: for (PetscInt proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
832: const PetscMPIInt proc = comm_procs[proc_idx];
833: PetscInt *sbuff, *pt, scount;
834: MPI_Request *request;
836: /* count ghosts */
837: PetscCall(PetscCDCountAt(ghost_deleted_list, proc, &ndel));
838: ndel /= 2; // two entries for each proc
839: scount = 2 + 2 * ndel;
840: PetscCall(PetscMalloc1(scount + request_size, &sbuff));
841: /* save requests */
842: sbuffs1[proc_idx] = sbuff;
843: request = (MPI_Request *)sbuff;
844: sbuff = pt = sbuff + request_size;
845: /* write [ndel, proc, n*[gid1,gid0] */
846: *pt++ = ndel; // number of deleted to send
847: *pt++ = rank; // proc (not used)
848: PetscCall(PetscCDGetHeadPos(ghost_deleted_list, proc, &pos));
849: while (pos) {
850: PetscInt lid0, ghost_idx, gid1;
852: PetscCall(PetscCDIntNdGetID(pos, &ghost_idx));
853: gid1 = lghost_gid[ghost_idx];
854: PetscCall(PetscCDGetNextPos(ghost_deleted_list, proc, &pos));
855: PetscCall(PetscCDIntNdGetID(pos, &lid0));
856: PetscCall(PetscCDGetNextPos(ghost_deleted_list, proc, &pos));
857: *pt++ = gid1;
858: *pt++ = lid0 + Istart; // gid0
859: }
860: PetscCheck(pt - sbuff == scount, PETSC_COMM_SELF, PETSC_ERR_SUP, "sbuff-pt != scount: %zu", pt - sbuff);
861: /* MPIU_Isend: tag1 [ndel, proc, n*[gid1,gid0] ] */
862: PetscCallMPI(MPIU_Isend(sbuff, scount, MPIU_INT, proc, tag1, comm, request));
863: PetscCall(PetscCDRemoveAllAt(ghost_deleted_list, proc)); // done with this list
864: }
865: /* receive deleted, send back partial aggregates, clear lists */
866: for (PetscInt proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
867: PetscCallMPI(MPI_Probe(comm_procs[proc_idx] /* MPI_ANY_SOURCE */, tag1, comm, &status));
868: {
869: PetscInt *pt, *pt2, *pt3, *sbuff, tmp;
870: MPI_Request *request;
871: PetscMPIInt rcount, scount;
872: const PetscMPIInt proc = status.MPI_SOURCE;
874: PetscCallMPI(MPI_Get_count(&status, MPIU_INT, &rcount));
875: if (rcount > rbuff_sz) {
876: if (rbuff) PetscCall(PetscFree(rbuff));
877: PetscCall(PetscMalloc1(rcount, &rbuff));
878: rbuff_sz = rcount;
879: }
880: /* MPI_Recv: tag1 [ndel, proc, ndel*[gid1,gid0] ] */
881: PetscCallMPI(MPI_Recv(rbuff, rcount, MPIU_INT, proc, tag1, comm, &status));
882: /* read and count sends *[lid0, n, n*[gid] ] */
883: pt = rbuff;
884: scount = 0;
885: ndel = *pt++; // number of deleted to recv
886: tmp = *pt++; // proc (not used)
887: while (ndel--) {
888: PetscInt gid1 = *pt++, lid1 = gid1 - Istart;
889: PetscInt gh_gid0 = *pt++; // gid on other proc (not used here to count)
891: PetscCheck(lid1 >= 0 && lid1 < nloc, PETSC_COMM_SELF, PETSC_ERR_SUP, "received ghost deleted %" PetscInt_FMT, gid1);
892: PetscCheck(!lid_matched[lid1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "%" PetscInt_FMT ") received matched local gid %" PetscInt_FMT ",%" PetscInt_FMT ", with ghost (lid) %" PetscInt_FMT " from proc %d", sub_it, gid1, gh_gid0, tmp, proc);
893: lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */
894: PetscCall(PetscCDCountAt(agg_llists, lid1, &tmp)); // n
895: scount += tmp + 2; // lid0, n, n*[gid]
896: }
897: PetscCheck((pt - rbuff) == (ptrdiff_t)rcount, PETSC_COMM_SELF, PETSC_ERR_SUP, "receive buffer size != num read: %zu; rcount: %d", pt - rbuff, rcount);
898: /* send tag2: *[gid0, n, n*[gid] ] */
899: PetscCall(PetscMalloc1(scount + request_size, &sbuff));
900: sbuffs2[proc_idx] = sbuff; /* cache request */
901: request = (MPI_Request *)sbuff;
902: pt2 = sbuff = sbuff + request_size;
903: // read again: n, proc, n*[gid1,gid0]
904: pt = rbuff;
905: ndel = *pt++;
906: tmp = *pt++; // proc (not used)
907: while (ndel--) {
908: PetscInt gid1 = *pt++, lid1 = gid1 - Istart, gh_gid0 = *pt++;
910: /* write [gid0, aggSz, aggSz[gid] ] */
911: *pt2++ = gh_gid0;
912: pt3 = pt2++; /* save pointer for later */
913: PetscCall(PetscCDGetHeadPos(agg_llists, lid1, &pos));
914: while (pos) {
915: PetscInt gid;
917: PetscCall(PetscCDIntNdGetID(pos, &gid));
918: PetscCall(PetscCDGetNextPos(agg_llists, lid1, &pos));
919: *pt2++ = gid;
920: }
921: PetscCall(PetscIntCast(pt2 - pt3 - 1, pt3));
922: /* clear list */
923: PetscCall(PetscCDRemoveAllAt(agg_llists, lid1));
924: }
925: PetscCheck((pt2 - sbuff) == (ptrdiff_t)scount, PETSC_COMM_SELF, PETSC_ERR_SUP, "buffer size != num write: %zu %d", pt2 - sbuff, scount);
926: /* MPIU_Isend: requested data tag2 *[lid0, n, n*[gid1] ] */
927: PetscCallMPI(MPIU_Isend(sbuff, scount, MPIU_INT, proc, tag2, comm, request));
928: }
929: } // proc_idx
930: /* receive tag2 *[gid0, n, n*[gid] ] */
931: for (PetscMPIInt proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
932: PetscMPIInt proc;
933: PetscInt *pt;
934: int rcount;
936: PetscCallMPI(MPI_Probe(comm_procs[proc_idx] /* MPI_ANY_SOURCE */, tag2, comm, &status));
937: PetscCallMPI(MPI_Get_count(&status, MPIU_INT, &rcount));
938: if (rcount > rbuff_sz) {
939: if (rbuff) PetscCall(PetscFree(rbuff));
940: PetscCall(PetscMalloc1(rcount, &rbuff));
941: rbuff_sz = rcount;
942: }
943: proc = status.MPI_SOURCE;
944: /* MPI_Recv: tag1 [n, proc, n*[gid1,lid0] ] */
945: PetscCallMPI(MPI_Recv(rbuff, rcount, MPIU_INT, proc, tag2, comm, &status));
946: pt = rbuff;
947: while (pt - rbuff < rcount) {
948: PetscInt gid0 = *pt++, n = *pt++;
950: while (n--) {
951: PetscInt gid1 = *pt++;
953: PetscCall(PetscCDAppendID(agg_llists, gid0 - Istart, gid1));
954: }
955: }
956: PetscCheck((pt - rbuff) == (ptrdiff_t)rcount, PETSC_COMM_SELF, PETSC_ERR_SUP, "recv buffer size != num read: %zu %d", pt - rbuff, rcount);
957: }
958: /* wait for tag1 isends */
959: for (PetscMPIInt proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
960: MPI_Request *request = (MPI_Request *)sbuffs1[proc_idx];
962: PetscCallMPI(MPI_Wait(request, &status));
963: PetscCall(PetscFree(sbuffs1[proc_idx]));
964: }
965: /* wait for tag2 isends */
966: for (PetscMPIInt proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
967: MPI_Request *request = (MPI_Request *)sbuffs2[proc_idx];
969: PetscCallMPI(MPI_Wait(request, &status));
970: PetscCall(PetscFree(sbuffs2[proc_idx]));
971: }
972: } /* MPI */
973: /* set 'lghost_matched' - use locMaxEdge, ghostMaxEdge (recomputed next) */
974: if (isMPI) {
975: const PetscScalar *sbuff;
977: for (PetscInt kk = 0, gid = Istart; kk < nloc; kk++, gid++) {
978: PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0;
980: PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
981: }
982: PetscCall(VecAssemblyBegin(locMaxEdge));
983: PetscCall(VecAssemblyEnd(locMaxEdge));
984: PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
985: PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
986: PetscCall(VecGetArrayRead(ghostMaxEdge, &sbuff));
987: for (PetscInt kk = 0; kk < num_ghosts; kk++) { lghost_matched[kk] = (PetscBool)(PetscRealPart(sbuff[kk]) != 0.0); }
988: PetscCall(VecRestoreArrayRead(ghostMaxEdge, &sbuff));
989: }
990: /* compute 'locMaxEdge' inside sub iteration b/c max weight can drop as neighbors are matched */
991: for (PetscInt kk = 0, gid = Istart; kk < nloc; kk++, gid++) {
992: PetscReal max_e = 0., tt;
993: PetscScalar vval;
994: const PetscInt lid = kk;
995: PetscMPIInt max_pe = rank, pe, n;
997: ii = matA->i;
998: PetscCall(PetscMPIIntCast(ii[lid + 1] - ii[lid], &n));
999: aj = PetscSafePointerPlusOffset(matA->j, ii[lid]);
1000: ap = PetscSafePointerPlusOffset(matA->a, ii[lid]);
1001: for (PetscMPIInt jj = 0; jj < n; jj++) {
1002: PetscInt lidj = aj[jj];
1004: if (lid_matched[lidj]) continue; /* this is new - can change local max */
1005: if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]);
1006: }
1007: if (lid_cprowID && (ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
1008: ii = matB->compressedrow.i;
1009: PetscCall(PetscMPIIntCast(ii[ix + 1] - ii[ix], &n));
1010: ap = matB->a + ii[ix];
1011: aj = matB->j + ii[ix];
1012: for (PetscMPIInt jj = 0; jj < n; jj++) {
1013: PetscInt lidj = aj[jj];
1015: if (lghost_matched[lidj]) continue;
1016: if ((tt = PetscRealPart(ap[jj])) > max_e) max_e = tt;
1017: }
1018: }
1019: vval = max_e;
1020: PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
1021: // max PE with max edge
1022: if (lid_cprowID && (ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
1023: ii = matB->compressedrow.i;
1024: PetscCall(PetscMPIIntCast(ii[ix + 1] - ii[ix], &n));
1025: ap = matB->a + ii[ix];
1026: aj = matB->j + ii[ix];
1027: for (PetscInt jj = 0; jj < n; jj++) {
1028: PetscInt lidj = aj[jj];
1030: if (lghost_matched[lidj]) continue;
1031: if ((pe = lghost_pe[aj[jj]]) > max_pe && PetscRealPart(ap[jj]) >= max_e - MY_MEPS) { max_pe = pe; }
1032: }
1033: }
1034: vval = max_pe;
1035: PetscCall(VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES));
1036: }
1037: PetscCall(VecAssemblyBegin(locMaxEdge));
1038: PetscCall(VecAssemblyEnd(locMaxEdge));
1039: PetscCall(VecAssemblyBegin(locMaxPE));
1040: PetscCall(VecAssemblyEnd(locMaxPE));
1041: /* compute 'lghost_max_ew' and 'lghost_max_pe' to get ready for next iteration*/
1042: if (isMPI) {
1043: const PetscScalar *buf;
1045: PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
1046: PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
1047: PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
1048: PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
1049: PetscCall(VecGetArrayRead(ghostMaxPE, &buf));
1050: for (PetscInt kk = 0; kk < num_ghosts; kk++) {
1051: lghost_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now
1052: }
1053: PetscCall(VecRestoreArrayRead(ghostMaxPE, &buf));
1054: }
1055: // if no active edges, stop
1056: if (gn_act_n[0] < 1) break;
1057: // inc and check (self stopping iteration
1058: PetscCheck(old_num_edge != gn_act_n[0], PETSC_COMM_SELF, PETSC_ERR_SUP, "HEM stalled step %" PetscInt_FMT "/%" PetscInt_FMT, sub_it + 1, n_sub_its);
1059: sub_it++;
1060: PetscCheck(sub_it < n_sub_its, PETSC_COMM_SELF, PETSC_ERR_SUP, "failed to finish HEM step %" PetscInt_FMT "/%" PetscInt_FMT, sub_it + 1, n_sub_its);
1061: old_num_edge = gn_act_n[0];
1062: } /* sub_it loop */
1063: /* clean up iteration */
1064: PetscCall(PetscFree(Edges));
1065: if (isMPI) { // can be hoisted
1066: PetscCall(VecRestoreArrayRead(ghostMaxEdge, &lghost_max_ew));
1067: PetscCall(VecDestroy(&ghostMaxEdge));
1068: PetscCall(VecDestroy(&ghostMaxPE));
1069: PetscCall(PetscFree4(lghost_matched, lghost_pe, lghost_gid, lghost_max_pe));
1070: }
1071: PetscCall(VecDestroy(&locMaxEdge));
1072: PetscCall(VecDestroy(&locMaxPE));
1073: /* create next graph */
1074: {
1075: Vec diag;
1077: /* add identity for unmatched vertices so they stay alive */
1078: for (PetscInt kk = 0, gid1, gid = Istart; kk < nloc; kk++, gid++) {
1079: if (!lid_matched[kk]) {
1080: const PetscInt lid = kk;
1081: PetscCDIntNd *pos;
1083: PetscCall(PetscCDGetHeadPos(agg_llists, lid, &pos));
1084: PetscCheck(pos, PETSC_COMM_SELF, PETSC_ERR_PLIB, "empty list in singleton: %" PetscInt_FMT, gid);
1085: PetscCall(PetscCDIntNdGetID(pos, &gid1));
1086: PetscCheck(gid1 == gid, PETSC_COMM_SELF, PETSC_ERR_PLIB, "first in list (%" PetscInt_FMT ") in singleton not %" PetscInt_FMT, gid1, gid);
1087: PetscCall(MatSetValues(P, 1, &gid, 1, &gid, &one, INSERT_VALUES));
1088: }
1089: }
1090: PetscCall(MatAssemblyBegin(P, MAT_FINAL_ASSEMBLY));
1091: PetscCall(MatAssemblyEnd(P, MAT_FINAL_ASSEMBLY));
1093: /* project to make new graph with collapsed edges */
1094: PetscCall(MatPtAP(cMat, P, MAT_INITIAL_MATRIX, 1.0, &tMat));
1095: PetscCall(MatDestroy(&P));
1096: PetscCall(MatDestroy(&cMat));
1097: cMat = tMat;
1098: PetscCall(MatCreateVecs(cMat, &diag, NULL));
1099: PetscCall(MatGetDiagonal(cMat, diag));
1100: PetscCall(VecReciprocal(diag));
1101: PetscCall(VecSqrtAbs(diag));
1102: PetscCall(MatDiagonalScale(cMat, diag, diag));
1103: PetscCall(VecDestroy(&diag));
1104: }
1105: } /* coarsen iterator */
1107: /* make fake matrix with Mat->B only for smoothed agg QR. Need this if we make an aux graph (ie, PtAP) with k > 1 */
1108: if (size > 1) {
1109: Mat mat;
1110: PetscCDIntNd *pos;
1111: PetscInt NN, MM, jj = 0, mxsz = 0;
1113: for (PetscInt kk = 0; kk < nloc; kk++) {
1114: PetscCall(PetscCDCountAt(agg_llists, kk, &jj));
1115: if (jj > mxsz) mxsz = jj;
1116: }
1117: PetscCall(MatGetSize(a_Gmat, &MM, &NN));
1118: if (mxsz > MM - nloc) mxsz = MM - nloc;
1119: /* matrix of ghost adj for square graph */
1120: PetscCall(MatCreateAIJ(comm, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE, 0, NULL, mxsz, NULL, &mat));
1121: for (PetscInt lid = 0, gid = Istart; lid < nloc; lid++, gid++) {
1122: PetscCall(PetscCDGetHeadPos(agg_llists, lid, &pos));
1123: while (pos) {
1124: PetscInt gid1;
1126: PetscCall(PetscCDIntNdGetID(pos, &gid1));
1127: PetscCall(PetscCDGetNextPos(agg_llists, lid, &pos));
1128: if (gid1 < Istart || gid1 >= Istart + nloc) PetscCall(MatSetValues(mat, 1, &gid, 1, &gid1, &one, ADD_VALUES));
1129: }
1130: }
1131: PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
1132: PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
1133: PetscCall(PetscCDSetMat(agg_llists, mat));
1134: PetscCall(PetscCDDestroy(ghost_deleted_list));
1135: if (rbuff_sz) PetscCall(PetscFree(rbuff)); // always true
1136: }
1137: // move BCs into some node
1138: if (bc_list) {
1139: PetscCDIntNd *pos;
1141: PetscCall(PetscCDGetHeadPos(bc_list, 0, &pos));
1142: while (pos) {
1143: PetscInt gid1;
1145: PetscCall(PetscCDIntNdGetID(pos, &gid1));
1146: PetscCall(PetscCDGetNextPos(bc_list, 0, &pos));
1147: PetscCall(PetscCDAppendID(agg_llists, bc_agg, gid1));
1148: }
1149: PetscCall(PetscCDRemoveAllAt(bc_list, 0));
1150: PetscCall(PetscCDDestroy(bc_list));
1151: }
1152: {
1153: // check sizes -- all vertices must get in graph
1154: PetscInt sz, globalsz, MM;
1156: PetscCall(MatGetSize(a_Gmat, &MM, NULL));
1157: PetscCall(PetscCDCount(agg_llists, &sz));
1158: PetscCallMPI(MPIU_Allreduce(&sz, &globalsz, 1, MPIU_INT, MPI_SUM, comm));
1159: PetscCheck(MM == globalsz, comm, PETSC_ERR_SUP, "lost %" PetscInt_FMT " equations ?", MM - globalsz);
1160: }
1161: // cleanup
1162: PetscCall(MatDestroy(&cMat));
1163: PetscCall(PetscFree3(lid_matched, lid_cprowID, lid_max_pe));
1164: PetscCall(ISDestroy(&info_is));
1165: PetscFunctionReturn(PETSC_SUCCESS);
1166: }
1168: /*
1169: HEM coarsen, simple greedy.
1170: */
1171: static PetscErrorCode MatCoarsenApply_HEM(MatCoarsen coarse)
1172: {
1173: Mat mat = coarse->graph;
1175: PetscFunctionBegin;
1176: PetscCall(MatCoarsenApply_HEM_private(mat, coarse->max_it, coarse->threshold, &coarse->agg_lists));
1177: PetscFunctionReturn(PETSC_SUCCESS);
1178: }
1180: static PetscErrorCode MatCoarsenView_HEM(MatCoarsen coarse, PetscViewer viewer)
1181: {
1182: PetscMPIInt rank;
1183: PetscBool iascii;
1185: PetscFunctionBegin;
1186: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)coarse), &rank));
1187: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1188: if (iascii) {
1189: PetscCDIntNd *pos, *pos2;
1190: PetscViewerFormat format;
1192: PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT " matching steps with threshold = %g\n", coarse->max_it, (double)coarse->threshold));
1193: PetscCall(PetscViewerGetFormat(viewer, &format));
1194: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1195: if (coarse->agg_lists) {
1196: PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1197: for (PetscInt kk = 0; kk < coarse->agg_lists->size; kk++) {
1198: PetscCall(PetscCDGetHeadPos(coarse->agg_lists, kk, &pos));
1199: if ((pos2 = pos)) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "selected local %" PetscInt_FMT ": ", kk));
1200: while (pos) {
1201: PetscInt gid1;
1203: PetscCall(PetscCDIntNdGetID(pos, &gid1));
1204: PetscCall(PetscCDGetNextPos(coarse->agg_lists, kk, &pos));
1205: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " %" PetscInt_FMT " ", gid1));
1206: }
1207: if (pos2) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n"));
1208: }
1209: PetscCall(PetscViewerFlush(viewer));
1210: PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1211: } else {
1212: PetscCall(PetscViewerASCIIPrintf(viewer, " HEM aggregator lists are not available\n"));
1213: }
1214: }
1215: }
1216: PetscFunctionReturn(PETSC_SUCCESS);
1217: }
1219: /*MC
1220: MATCOARSENHEM - A coarsener that uses HEM a simple greedy coarsener
1222: Level: beginner
1224: .seealso: `MatCoarsen`, `MatCoarsenMISKSetDistance()`, `MatCoarsenApply()`, `MatCoarsenSetType()`, `MatCoarsenType`, `MatCoarsenCreate()`, `MATCOARSENMISK`, `MATCOARSENMIS`
1225: M*/
1227: PETSC_EXTERN PetscErrorCode MatCoarsenCreate_HEM(MatCoarsen coarse)
1228: {
1229: PetscFunctionBegin;
1230: coarse->ops->apply = MatCoarsenApply_HEM;
1231: coarse->ops->view = MatCoarsenView_HEM;
1232: coarse->max_it = 4;
1233: PetscFunctionReturn(PETSC_SUCCESS);
1234: }