Actual source code: sbaijov.c
1: /*
2: Routines to compute overlapping regions of a parallel MPI matrix.
3: Used for finding submatrices that were shared across processors.
4: */
5: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
6: #include <petscbt.h>
8: static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat, PetscInt, IS *);
9: static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Local(Mat, PetscInt *, PetscInt, PetscInt *, PetscBT *);
11: PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat C, PetscInt is_max, IS is[], PetscInt ov)
12: {
13: PetscInt i, N = C->cmap->N, bs = C->rmap->bs, M = C->rmap->N, Mbs = M / bs, *nidx, isz, iov;
14: IS *is_new, *is_row;
15: Mat *submats;
16: Mat_MPISBAIJ *c = (Mat_MPISBAIJ *)C->data;
17: Mat_SeqSBAIJ *asub_i;
18: PetscBT table;
19: PetscInt *ai, brow, nz, nis, l, nmax, nstages_local, nstages, max_no, pos;
20: const PetscInt *idx;
21: PetscBool flg;
23: PetscFunctionBegin;
24: PetscCall(PetscMalloc1(is_max, &is_new));
25: /* Convert the indices into block format */
26: PetscCall(ISCompressIndicesGeneral(N, C->rmap->n, bs, is_max, is, is_new));
27: PetscCheck(ov >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative overlap specified");
29: /* previous non-scalable implementation */
30: flg = PETSC_FALSE;
31: PetscCall(PetscOptionsHasName(NULL, NULL, "-IncreaseOverlap_old", &flg));
32: if (flg) { /* previous non-scalable implementation */
33: printf("use previous non-scalable implementation...\n");
34: for (i = 0; i < ov; ++i) PetscCall(MatIncreaseOverlap_MPISBAIJ_Once(C, is_max, is_new));
35: } else { /* implementation using modified BAIJ routines */
37: PetscCall(PetscMalloc1(Mbs + 1, &nidx));
38: PetscCall(PetscBTCreate(Mbs, &table)); /* for column search */
40: /* Create is_row */
41: PetscCall(PetscMalloc1(is_max, &is_row));
42: PetscCall(ISCreateStride(PETSC_COMM_SELF, Mbs, 0, 1, &is_row[0]));
44: for (i = 1; i < is_max; i++) { is_row[i] = is_row[0]; /* reuse is_row[0] */ }
46: /* Allocate memory to hold all the submatrices - Modified from MatCreateSubMatrices_MPIBAIJ() */
47: PetscCall(PetscMalloc1(is_max + 1, &submats));
49: /* Determine the number of stages through which submatrices are done */
50: nmax = 20 * 1000000 / (c->Nbs * sizeof(PetscInt));
51: if (!nmax) nmax = 1;
52: nstages_local = is_max / nmax + ((is_max % nmax) ? 1 : 0);
54: /* Make sure every processor loops through the nstages */
55: PetscCallMPI(MPIU_Allreduce(&nstages_local, &nstages, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)C)));
57: {
58: const PetscObject obj = (PetscObject)c->A;
59: size_t new_len, cur_len, max_len;
61: PetscCall(PetscStrlen(MATSEQBAIJ, &new_len));
62: PetscCall(PetscStrlen(MATSEQSBAIJ, &cur_len));
63: max_len = PetscMax(cur_len, new_len) + 1;
64: PetscCall(PetscRealloc(max_len * sizeof(*obj->type_name), &obj->type_name));
65: /* The resulting submatrices should be BAIJ, not SBAIJ, hence we change this value to
66: trigger that */
67: for (iov = 0; iov < ov; ++iov) {
68: /* 1) Get submats for column search */
69: PetscCall(PetscStrncpy(obj->type_name, MATSEQBAIJ, max_len));
70: for (i = 0, pos = 0; i < nstages; i++) {
71: if (pos + nmax <= is_max) max_no = nmax;
72: else if (pos == is_max) max_no = 0;
73: else max_no = is_max - pos;
74: c->ijonly = PETSC_TRUE; /* only matrix data structures are requested */
76: PetscCall(MatCreateSubMatrices_MPIBAIJ_local(C, max_no, is_row + pos, is_new + pos, MAT_INITIAL_MATRIX, submats + pos, PETSC_TRUE));
77: pos += max_no;
78: }
79: PetscCall(PetscStrncpy(obj->type_name, MATSEQSBAIJ, max_len));
81: /* 2) Row search */
82: PetscCall(MatIncreaseOverlap_MPIBAIJ_Once(C, is_max, is_new));
84: /* 3) Column search */
85: for (i = 0; i < is_max; i++) {
86: asub_i = (Mat_SeqSBAIJ *)submats[i]->data;
87: ai = asub_i->i;
89: /* put is_new obtained from MatIncreaseOverlap_MPIBAIJ() to table */
90: PetscCall(PetscBTMemzero(Mbs, table));
92: PetscCall(ISGetIndices(is_new[i], &idx));
93: PetscCall(ISGetLocalSize(is_new[i], &nis));
94: for (l = 0; l < nis; l++) {
95: PetscCall(PetscBTSet(table, idx[l]));
96: nidx[l] = idx[l];
97: }
98: isz = nis;
100: /* add column entries to table */
101: for (brow = 0; brow < Mbs; brow++) {
102: nz = ai[brow + 1] - ai[brow];
103: if (nz) {
104: if (!PetscBTLookupSet(table, brow)) nidx[isz++] = brow;
105: }
106: }
107: PetscCall(ISRestoreIndices(is_new[i], &idx));
108: PetscCall(ISDestroy(&is_new[i]));
110: /* create updated is_new */
111: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, isz, nidx, PETSC_COPY_VALUES, is_new + i));
112: }
114: /* Free tmp spaces */
115: for (i = 0; i < is_max; i++) PetscCall(MatDestroy(&submats[i]));
116: }
118: PetscCall(PetscBTDestroy(&table));
119: PetscCall(PetscFree(submats));
120: PetscCall(ISDestroy(&is_row[0]));
121: PetscCall(PetscFree(is_row));
122: PetscCall(PetscFree(nidx));
123: }
124: }
125: for (i = 0; i < is_max; i++) {
126: PetscCall(ISDestroy(&is[i]));
127: PetscCall(ISGetLocalSize(is_new[i], &nis));
128: PetscCall(ISGetIndices(is_new[i], &idx));
129: PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)is_new[i]), bs, nis, idx, PETSC_COPY_VALUES, &is[i]));
130: PetscCall(ISDestroy(&is_new[i]));
131: }
132: PetscCall(PetscFree(is_new));
133: PetscFunctionReturn(PETSC_SUCCESS);
134: }
136: typedef enum {
137: MINE,
138: OTHER
139: } WhoseOwner;
140: /* data1, odata1 and odata2 are packed in the format (for communication):
141: data[0] = is_max, no of is
142: data[1] = size of is[0]
143: ...
144: data[is_max] = size of is[is_max-1]
145: data[is_max + 1] = data(is[0])
146: ...
147: data[is_max+1+sum(size of is[k]), k=0,...,i-1] = data(is[i])
148: ...
149: data2 is packed in the format (for creating output is[]):
150: data[0] = is_max, no of is
151: data[1] = size of is[0]
152: ...
153: data[is_max] = size of is[is_max-1]
154: data[is_max + 1] = data(is[0])
155: ...
156: data[is_max + 1 + Mbs*i) = data(is[i])
157: ...
158: */
159: static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat C, PetscInt is_max, IS is[])
160: {
161: Mat_MPISBAIJ *c = (Mat_MPISBAIJ *)C->data;
162: PetscMPIInt proc_end = 0, size, rank, tag1, tag2, *len_s, nrqr, nrqs, *id_r1, *len_r1, flag, *iwork;
163: const PetscInt *idx_i;
164: PetscInt idx, isz, col, *n, *data1, **data1_start, *data2, *data2_i, *data, *data_i, len;
165: PetscInt Mbs, i, j, k, *odata1, *odata2;
166: PetscInt **odata2_ptr, *ctable = NULL, *btable, len_max, len_est;
167: PetscInt len_unused, nodata2;
168: PetscInt ois_max; /* max no of is[] in each of processor */
169: char *t_p;
170: MPI_Comm comm;
171: MPI_Request *s_waits1, *s_waits2, r_req;
172: MPI_Status *s_status, r_status;
173: PetscBT *table; /* mark indices of this processor's is[] */
174: PetscBT table_i;
175: PetscBT otable; /* mark indices of other processors' is[] */
176: PetscInt bs = C->rmap->bs, Bn = c->B->cmap->n, Bnbs = Bn / bs, *Bowners;
177: IS garray_local, garray_gl;
179: PetscFunctionBegin;
180: PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
181: size = c->size;
182: rank = c->rank;
183: Mbs = c->Mbs;
185: PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag1));
186: PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag2));
188: /* create tables used in
189: step 1: table[i] - mark c->garray of proc [i]
190: step 3: table[i] - mark indices of is[i] when whose=MINE
191: table[0] - mark incideces of is[] when whose=OTHER */
192: len = PetscMax(is_max, size);
193: PetscCall(PetscMalloc2(len, &table, (Mbs / PETSC_BITS_PER_BYTE + 1) * len, &t_p));
194: for (i = 0; i < len; i++) table[i] = t_p + (Mbs / PETSC_BITS_PER_BYTE + 1) * i;
196: PetscCallMPI(MPIU_Allreduce(&is_max, &ois_max, 1, MPIU_INT, MPI_MAX, comm));
198: /* 1. Send this processor's is[] to other processors */
199: /* allocate spaces */
200: PetscCall(PetscMalloc1(is_max, &n));
201: len = 0;
202: for (i = 0; i < is_max; i++) {
203: PetscCall(ISGetLocalSize(is[i], &n[i]));
204: len += n[i];
205: }
206: if (!len) {
207: is_max = 0;
208: } else {
209: len += 1 + is_max; /* max length of data1 for one processor */
210: }
212: PetscCall(PetscMalloc1(size * len + 1, &data1));
213: PetscCall(PetscMalloc1(size, &data1_start));
214: for (i = 0; i < size; i++) data1_start[i] = data1 + i * len;
216: PetscCall(PetscMalloc4(size, &len_s, size, &btable, size, &iwork, size + 1, &Bowners));
218: /* gather c->garray from all processors */
219: PetscCall(ISCreateGeneral(comm, Bnbs, c->garray, PETSC_COPY_VALUES, &garray_local));
220: PetscCall(ISAllGather(garray_local, &garray_gl));
221: PetscCall(ISDestroy(&garray_local));
222: PetscCallMPI(MPI_Allgather(&Bnbs, 1, MPIU_INT, Bowners + 1, 1, MPIU_INT, comm));
224: Bowners[0] = 0;
225: for (i = 0; i < size; i++) Bowners[i + 1] += Bowners[i];
227: if (is_max) {
228: /* hash table ctable which maps c->row to proc_id) */
229: PetscCall(PetscMalloc1(Mbs, &ctable));
230: j = 0;
231: for (PetscMPIInt proc_id = 0; proc_id < size; proc_id++) {
232: for (; j < C->rmap->range[proc_id + 1] / bs; j++) ctable[j] = proc_id;
233: }
235: /* hash tables marking c->garray */
236: PetscCall(ISGetIndices(garray_gl, &idx_i));
237: for (i = 0; i < size; i++) {
238: table_i = table[i];
239: PetscCall(PetscBTMemzero(Mbs, table_i));
240: for (j = Bowners[i]; j < Bowners[i + 1]; j++) { /* go through B cols of proc[i]*/
241: PetscCall(PetscBTSet(table_i, idx_i[j]));
242: }
243: }
244: PetscCall(ISRestoreIndices(garray_gl, &idx_i));
245: } /* if (is_max) */
246: PetscCall(ISDestroy(&garray_gl));
248: /* evaluate communication - mesg to who, length, and buffer space */
249: for (i = 0; i < size; i++) len_s[i] = 0;
251: /* header of data1 */
252: for (PetscMPIInt proc_id = 0; proc_id < size; proc_id++) {
253: iwork[proc_id] = 0;
254: *data1_start[proc_id] = is_max;
255: data1_start[proc_id]++;
256: for (j = 0; j < is_max; j++) {
257: if (proc_id == rank) {
258: *data1_start[proc_id] = n[j];
259: } else {
260: *data1_start[proc_id] = 0;
261: }
262: data1_start[proc_id]++;
263: }
264: }
266: for (i = 0; i < is_max; i++) {
267: PetscCall(ISGetIndices(is[i], &idx_i));
268: for (j = 0; j < n[i]; j++) {
269: idx = idx_i[j];
270: *data1_start[rank] = idx;
271: data1_start[rank]++; /* for local processing */
272: PetscCall(PetscMPIIntCast(ctable[idx], &proc_end));
273: for (PetscMPIInt proc_id = 0; proc_id <= proc_end; proc_id++) { /* for others to process */
274: if (proc_id == rank) continue; /* done before this loop */
275: if (proc_id < proc_end && !PetscBTLookup(table[proc_id], idx)) continue; /* no need for sending idx to [proc_id] */
276: *data1_start[proc_id] = idx;
277: data1_start[proc_id]++;
278: len_s[proc_id]++;
279: }
280: }
281: /* update header data */
282: for (PetscMPIInt proc_id = 0; proc_id < size; proc_id++) {
283: if (proc_id == rank) continue;
284: *(data1 + proc_id * len + 1 + i) = len_s[proc_id] - iwork[proc_id];
285: iwork[proc_id] = len_s[proc_id];
286: }
287: PetscCall(ISRestoreIndices(is[i], &idx_i));
288: }
290: nrqs = 0;
291: nrqr = 0;
292: for (i = 0; i < size; i++) {
293: data1_start[i] = data1 + i * len;
294: if (len_s[i]) {
295: nrqs++;
296: len_s[i] += 1 + is_max; /* add no. of header msg */
297: }
298: }
300: for (i = 0; i < is_max; i++) PetscCall(ISDestroy(&is[i]));
301: PetscCall(PetscFree(n));
302: PetscCall(PetscFree(ctable));
304: /* Determine the number of messages to expect, their lengths, from from-ids */
305: PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &nrqr));
306: PetscCall(PetscGatherMessageLengths(comm, nrqs, nrqr, len_s, &id_r1, &len_r1));
308: /* Now post the sends */
309: PetscCall(PetscMalloc2(size, &s_waits1, size, &s_waits2));
310: k = 0;
311: for (PetscMPIInt proc_id = 0; proc_id < size; proc_id++) { /* send data1 to processor [proc_id] */
312: if (len_s[proc_id]) {
313: PetscCallMPI(MPIU_Isend(data1_start[proc_id], len_s[proc_id], MPIU_INT, proc_id, tag1, comm, s_waits1 + k));
314: k++;
315: }
316: }
318: /* 2. Receive other's is[] and process. Then send back */
319: len = 0;
320: for (i = 0; i < nrqr; i++) {
321: if (len_r1[i] > len) len = len_r1[i];
322: }
323: PetscCall(PetscFree(len_r1));
324: PetscCall(PetscFree(id_r1));
326: for (PetscMPIInt proc_id = 0; proc_id < size; proc_id++) len_s[proc_id] = iwork[proc_id] = 0;
328: PetscCall(PetscMalloc1(len + 1, &odata1));
329: PetscCall(PetscMalloc1(size, &odata2_ptr));
330: PetscCall(PetscBTCreate(Mbs, &otable));
332: len_max = ois_max * (Mbs + 1); /* max space storing all is[] for each receive */
333: len_est = 2 * len_max; /* estimated space of storing is[] for all receiving messages */
334: PetscCall(PetscMalloc1(len_est + 1, &odata2));
335: nodata2 = 0; /* nodata2+1: num of PetscMalloc(,&odata2_ptr[]) called */
337: odata2_ptr[nodata2] = odata2;
339: len_unused = len_est; /* unused space in the array odata2_ptr[nodata2]-- needs to be >= len_max */
341: k = 0;
342: while (k < nrqr) {
343: PetscMPIInt ilen;
345: /* Receive messages */
346: PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag1, comm, &flag, &r_status));
347: if (flag) {
348: PetscMPIInt proc_id;
350: PetscCallMPI(MPI_Get_count(&r_status, MPIU_INT, &ilen));
351: proc_id = r_status.MPI_SOURCE;
352: PetscCallMPI(MPIU_Irecv(odata1, ilen, MPIU_INT, proc_id, r_status.MPI_TAG, comm, &r_req));
353: PetscCallMPI(MPI_Wait(&r_req, &r_status));
355: /* Process messages */
356: /* make sure there is enough unused space in odata2 array */
357: if (len_unused < len_max) { /* allocate more space for odata2 */
358: PetscCall(PetscMalloc1(len_est + 1, &odata2));
359: odata2_ptr[++nodata2] = odata2;
360: len_unused = len_est;
361: }
363: PetscCall(MatIncreaseOverlap_MPISBAIJ_Local(C, odata1, OTHER, odata2, &otable));
364: len = 1 + odata2[0];
365: for (i = 0; i < odata2[0]; i++) len += odata2[1 + i];
367: /* Send messages back */
368: PetscCallMPI(MPIU_Isend(odata2, len, MPIU_INT, proc_id, tag2, comm, s_waits2 + k));
369: k++;
370: odata2 += len;
371: len_unused -= len;
372: PetscCall(PetscMPIIntCast(len, &len_s[proc_id])); /* length of message sending back to proc_id */
373: }
374: }
375: PetscCall(PetscFree(odata1));
376: PetscCall(PetscBTDestroy(&otable));
378: /* 3. Do local work on this processor's is[] */
379: /* make sure there is enough unused space in odata2(=data) array */
380: len_max = is_max * (Mbs + 1); /* max space storing all is[] for this processor */
381: if (len_unused < len_max) { /* allocate more space for odata2 */
382: PetscCall(PetscMalloc1(len_est + 1, &odata2));
384: odata2_ptr[++nodata2] = odata2;
385: }
387: data = odata2;
388: PetscCall(MatIncreaseOverlap_MPISBAIJ_Local(C, data1_start[rank], MINE, data, table));
389: PetscCall(PetscFree(data1_start));
391: /* 4. Receive work done on other processors, then merge */
392: /* get max number of messages that this processor expects to recv */
393: PetscCallMPI(MPIU_Allreduce(len_s, iwork, size, MPI_INT, MPI_MAX, comm));
394: PetscCall(PetscMalloc1(iwork[rank] + 1, &data2));
395: PetscCall(PetscFree4(len_s, btable, iwork, Bowners));
397: k = 0;
398: while (k < nrqs) {
399: /* Receive messages */
400: PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag2, comm, &flag, &r_status));
401: if (flag) {
402: PetscMPIInt proc_id, ilen;
403: PetscCallMPI(MPI_Get_count(&r_status, MPIU_INT, &ilen));
404: proc_id = r_status.MPI_SOURCE;
405: PetscCallMPI(MPIU_Irecv(data2, ilen, MPIU_INT, proc_id, r_status.MPI_TAG, comm, &r_req));
406: PetscCallMPI(MPI_Wait(&r_req, &r_status));
407: if (ilen > 1 + is_max) { /* Add data2 into data */
408: data2_i = data2 + 1 + is_max;
409: for (i = 0; i < is_max; i++) {
410: table_i = table[i];
411: data_i = data + 1 + is_max + Mbs * i;
412: isz = data[1 + i];
413: for (j = 0; j < data2[1 + i]; j++) {
414: col = data2_i[j];
415: if (!PetscBTLookupSet(table_i, col)) data_i[isz++] = col;
416: }
417: data[1 + i] = isz;
418: if (i < is_max - 1) data2_i += data2[1 + i];
419: }
420: }
421: k++;
422: }
423: }
424: PetscCall(PetscFree(data2));
425: PetscCall(PetscFree2(table, t_p));
427: /* phase 1 sends are complete */
428: PetscCall(PetscMalloc1(size, &s_status));
429: if (nrqs) PetscCallMPI(MPI_Waitall(nrqs, s_waits1, s_status));
430: PetscCall(PetscFree(data1));
432: /* phase 2 sends are complete */
433: if (nrqr) PetscCallMPI(MPI_Waitall(nrqr, s_waits2, s_status));
434: PetscCall(PetscFree2(s_waits1, s_waits2));
435: PetscCall(PetscFree(s_status));
437: /* 5. Create new is[] */
438: for (i = 0; i < is_max; i++) {
439: data_i = data + 1 + is_max + Mbs * i;
440: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, data[1 + i], data_i, PETSC_COPY_VALUES, is + i));
441: }
442: for (k = 0; k <= nodata2; k++) PetscCall(PetscFree(odata2_ptr[k]));
443: PetscCall(PetscFree(odata2_ptr));
444: PetscFunctionReturn(PETSC_SUCCESS);
445: }
447: /*
448: MatIncreaseOverlap_MPISBAIJ_Local - Called by MatIncreaseOverlap, to do
449: the work on the local processor.
451: Inputs:
452: C - MAT_MPISBAIJ;
453: data - holds is[]. See MatIncreaseOverlap_MPISBAIJ_Once() for the format.
454: whose - whose is[] to be processed,
455: MINE: this processor's is[]
456: OTHER: other processor's is[]
457: Output:
458: nidx - whose = MINE:
459: holds input and newly found indices in the same format as data
460: whose = OTHER:
461: only holds the newly found indices
462: table - table[i]: mark the indices of is[i], i=0,...,is_max. Used only in the case 'whose=MINE'.
463: */
464: /* Would computation be reduced by swapping the loop 'for each is' and 'for each row'? */
465: static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Local(Mat C, PetscInt *data, PetscInt whose, PetscInt *nidx, PetscBT *table)
466: {
467: Mat_MPISBAIJ *c = (Mat_MPISBAIJ *)C->data;
468: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)c->A->data;
469: Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)c->B->data;
470: PetscInt row, mbs, Mbs, *nidx_i, col, col_max, isz, isz0, *ai, *aj, *bi, *bj, *garray, rstart, l;
471: PetscInt a_start, a_end, b_start, b_end, i, j, k, is_max, *idx_i, n;
472: PetscBT table0; /* mark the indices of input is[] for look up */
473: PetscBT table_i; /* points to i-th table. When whose=OTHER, a single table is used for all is[] */
475: PetscFunctionBegin;
476: Mbs = c->Mbs;
477: mbs = a->mbs;
478: ai = a->i;
479: aj = a->j;
480: bi = b->i;
481: bj = b->j;
482: garray = c->garray;
483: rstart = c->rstartbs;
484: is_max = data[0];
486: PetscCall(PetscBTCreate(Mbs, &table0));
488: nidx[0] = is_max;
489: idx_i = data + is_max + 1; /* ptr to input is[0] array */
490: nidx_i = nidx + is_max + 1; /* ptr to output is[0] array */
491: for (i = 0; i < is_max; i++) { /* for each is */
492: isz = 0;
493: n = data[1 + i]; /* size of input is[i] */
495: /* initialize and set table_i(mark idx and nidx) and table0(only mark idx) */
496: if (whose == MINE) { /* process this processor's is[] */
497: table_i = table[i];
498: nidx_i = nidx + 1 + is_max + Mbs * i;
499: } else { /* process other processor's is[] - only use one temp table */
500: table_i = table[0];
501: }
502: PetscCall(PetscBTMemzero(Mbs, table_i));
503: PetscCall(PetscBTMemzero(Mbs, table0));
504: if (n == 0) {
505: nidx[1 + i] = 0; /* size of new is[i] */
506: continue;
507: }
509: isz0 = 0;
510: col_max = 0;
511: for (j = 0; j < n; j++) {
512: col = idx_i[j];
513: PetscCheck(col < Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "index col %" PetscInt_FMT " >= Mbs %" PetscInt_FMT, col, Mbs);
514: if (!PetscBTLookupSet(table_i, col)) {
515: PetscCall(PetscBTSet(table0, col));
516: if (whose == MINE) nidx_i[isz0] = col;
517: if (col_max < col) col_max = col;
518: isz0++;
519: }
520: }
522: if (whose == MINE) isz = isz0;
523: k = 0; /* no. of indices from input is[i] that have been examined */
524: for (row = 0; row < mbs; row++) {
525: a_start = ai[row];
526: a_end = ai[row + 1];
527: b_start = bi[row];
528: b_end = bi[row + 1];
529: if (PetscBTLookup(table0, row + rstart)) { /* row is on input is[i]:
530: do row search: collect all col in this row */
531: for (l = a_start; l < a_end; l++) { /* Amat */
532: col = aj[l] + rstart;
533: if (!PetscBTLookupSet(table_i, col)) nidx_i[isz++] = col;
534: }
535: for (l = b_start; l < b_end; l++) { /* Bmat */
536: col = garray[bj[l]];
537: if (!PetscBTLookupSet(table_i, col)) nidx_i[isz++] = col;
538: }
539: k++;
540: if (k >= isz0) break; /* for (row=0; row<mbs; row++) */
541: } else { /* row is not on input is[i]:
542: do col search: add row onto nidx_i if there is a col in nidx_i */
543: for (l = a_start; l < a_end; l++) { /* Amat */
544: col = aj[l] + rstart;
545: if (col > col_max) break;
546: if (PetscBTLookup(table0, col)) {
547: if (!PetscBTLookupSet(table_i, row + rstart)) nidx_i[isz++] = row + rstart;
548: break; /* for l = start; l<end ; l++) */
549: }
550: }
551: for (l = b_start; l < b_end; l++) { /* Bmat */
552: col = garray[bj[l]];
553: if (col > col_max) break;
554: if (PetscBTLookup(table0, col)) {
555: if (!PetscBTLookupSet(table_i, row + rstart)) nidx_i[isz++] = row + rstart;
556: break; /* for l = start; l<end ; l++) */
557: }
558: }
559: }
560: }
562: if (i < is_max - 1) {
563: idx_i += n; /* ptr to input is[i+1] array */
564: nidx_i += isz; /* ptr to output is[i+1] array */
565: }
566: nidx[1 + i] = isz; /* size of new is[i] */
567: } /* for each is */
568: PetscCall(PetscBTDestroy(&table0));
569: PetscFunctionReturn(PETSC_SUCCESS);
570: }