Actual source code: mmaij.c

  1: /*
  2:    Support for the parallel AIJ matrix vector multiply
  3: */
  4: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  5: #include <petsc/private/vecimpl.h>
  6: #include <petsc/private/isimpl.h>

  8: PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat)
  9: {
 10:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
 11:   Mat_SeqAIJ *B   = (Mat_SeqAIJ *)aij->B->data;
 12:   PetscInt    i, j, *aj = B->j, *garray;
 13:   PetscInt    ec = 0; /* Number of nonzero external columns */
 14:   IS          from, to;
 15:   Vec         gvec;
 16: #if defined(PETSC_USE_CTABLE)
 17:   PetscHMapI    gid1_lid1 = NULL;
 18:   PetscHashIter tpos;
 19:   PetscInt      gid, lid;
 20: #else
 21:   PetscInt N = mat->cmap->N, *indices;
 22: #endif

 24:   PetscFunctionBegin;
 25:   if (!aij->garray) {
 26:     PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat");
 27: #if defined(PETSC_USE_CTABLE)
 28:     /* use a table */
 29:     PetscCall(PetscHMapICreateWithSize(aij->B->rmap->n, &gid1_lid1));
 30:     for (i = 0; i < aij->B->rmap->n; i++) {
 31:       for (j = 0; j < B->ilen[i]; j++) {
 32:         PetscInt data, gid1 = aj[B->i[i] + j] + 1;
 33:         PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
 34:         if (!data) {
 35:           /* one based table */
 36:           PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
 37:         }
 38:       }
 39:     }
 40:     /* form array of columns we need */
 41:     PetscCall(PetscMalloc1(ec, &garray));
 42:     PetscHashIterBegin(gid1_lid1, tpos);
 43:     while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
 44:       PetscHashIterGetKey(gid1_lid1, tpos, gid);
 45:       PetscHashIterGetVal(gid1_lid1, tpos, lid);
 46:       PetscHashIterNext(gid1_lid1, tpos);
 47:       gid--;
 48:       lid--;
 49:       garray[lid] = gid;
 50:     }
 51:     PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */
 52:     PetscCall(PetscHMapIClear(gid1_lid1));
 53:     for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
 54:     /* compact out the extra columns in B */
 55:     for (i = 0; i < aij->B->rmap->n; i++) {
 56:       for (j = 0; j < B->ilen[i]; j++) {
 57:         PetscInt gid1 = aj[B->i[i] + j] + 1;
 58:         PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
 59:         lid--;
 60:         aj[B->i[i] + j] = lid;
 61:       }
 62:     }
 63:     PetscCall(PetscLayoutDestroy(&aij->B->cmap));
 64:     PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap));
 65:     PetscCall(PetscHMapIDestroy(&gid1_lid1));
 66: #else
 67:     /* Make an array as long as the number of columns */
 68:     /* mark those columns that are in aij->B */
 69:     PetscCall(PetscCalloc1(N, &indices));
 70:     for (i = 0; i < aij->B->rmap->n; i++) {
 71:       for (j = 0; j < B->ilen[i]; j++) {
 72:         if (!indices[aj[B->i[i] + j]]) ec++;
 73:         indices[aj[B->i[i] + j]] = 1;
 74:       }
 75:     }

 77:     /* form array of columns we need */
 78:     PetscCall(PetscMalloc1(ec, &garray));
 79:     ec = 0;
 80:     for (i = 0; i < N; i++) {
 81:       if (indices[i]) garray[ec++] = i;
 82:     }

 84:     /* make indices now point into garray */
 85:     for (i = 0; i < ec; i++) indices[garray[i]] = i;

 87:     /* compact out the extra columns in B */
 88:     for (i = 0; i < aij->B->rmap->n; i++) {
 89:       for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
 90:     }
 91:     PetscCall(PetscLayoutDestroy(&aij->B->cmap));
 92:     PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap));
 93:     PetscCall(PetscFree(indices));
 94: #endif
 95:     // If we did rewrite B->j[] and B->cmap, increase the state. Device matrices detect this state change to sync the device copy.
 96:     if (ec) aij->B->nonzerostate++;
 97:   } else {
 98:     garray = aij->garray;
 99:   }

101:   if (!aij->lvec) {
102:     PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat");
103:     PetscCall(MatCreateVecs(aij->B, &aij->lvec, NULL));
104:   }
105:   PetscCall(VecGetSize(aij->lvec, &ec));

107:   /* create two temporary Index sets for build scatter gather */
108:   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from));
109:   PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to));

111:   /* create temporary global vector to generate scatter context */
112:   /* This does not allocate the array's memory so is efficient */
113:   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));

115:   /* generate the scatter context */
116:   PetscCall(VecScatterDestroy(&aij->Mvctx));
117:   PetscCall(VecScatterCreate(gvec, from, aij->lvec, to, &aij->Mvctx));
118:   PetscCall(VecScatterViewFromOptions(aij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
119:   aij->garray = garray;

121:   PetscCall(ISDestroy(&from));
122:   PetscCall(ISDestroy(&to));
123:   PetscCall(VecDestroy(&gvec));
124:   PetscFunctionReturn(PETSC_SUCCESS);
125: }

127: /* Disassemble the off-diag portion of the MPIAIJXxx matrix.
128:    In other words, change the B from reduced format using local col ids
129:    to expanded format using global col ids, which will make it easier to
130:    insert new nonzeros (with global col ids) into the matrix.
131:    The off-diag B determines communication in the matrix vector multiply.
132:    use_preallocation determines whether the use the preallocation or
133:    existing non-zero structure when creating the global form of B
134: */
135: PetscErrorCode MatDisAssemble_MPIAIJ(Mat A, PetscBool use_preallocation)
136: {
137:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data;
138:   Mat         B = aij->B, Bnew = NULL;

140:   PetscFunctionBegin;
141:   /* free stuff related to matrix-vec multiply */
142:   PetscCall(VecDestroy(&aij->lvec));
143:   if (aij->colmap) {
144: #if defined(PETSC_USE_CTABLE)
145:     PetscCall(PetscHMapIDestroy(&aij->colmap));
146: #else
147:     PetscCall(PetscFree(aij->colmap));
148: #endif
149:   }

151:   if (B) {
152:     Mat_SeqAIJ        *Baij = (Mat_SeqAIJ *)B->data;
153:     PetscInt           i, j, m = B->rmap->n, n = A->cmap->N, col, ct = 0, *garray = aij->garray, *nz;
154:     PetscScalar        v;
155:     const PetscScalar *ba;

157:     /* make sure that B is assembled so we can access its values */
158:     PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
159:     PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));

161:     /* invent new B and copy stuff over */
162:     PetscCall(PetscMalloc1(m + 1, &nz));
163:     if (use_preallocation)
164:       for (i = 0; i < m; i++) nz[i] = Baij->ipre[i];
165:     else
166:       for (i = 0; i < m; i++) nz[i] = Baij->i[i + 1] - Baij->i[i];
167:     PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
168:     PetscCall(MatSetSizes(Bnew, m, n, m, n)); /* Bnew now uses A->cmap->N as its col size */
169:     PetscCall(MatSetBlockSizesFromMats(Bnew, A, A));
170:     PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
171:     PetscCall(MatSeqAIJSetPreallocation(Bnew, 0, nz));

173:     if (Baij->nonew >= 0) { /* Inherit insertion error options (if positive). */
174:       ((Mat_SeqAIJ *)Bnew->data)->nonew = Baij->nonew;
175:     }

177:     /*
178:      Ensure that B's nonzerostate is monotonically increasing.
179:      Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
180:      */
181:     Bnew->nonzerostate = B->nonzerostate;

183:     PetscCall(PetscFree(nz));
184:     PetscCall(MatSeqAIJGetArrayRead(B, &ba));
185:     for (i = 0; i < m; i++) {
186:       for (j = Baij->i[i]; j < Baij->i[i + 1]; j++) {
187:         col = garray[Baij->j[ct]];
188:         v   = ba[ct++];
189:         PetscCall(MatSetValues(Bnew, 1, &i, 1, &col, &v, B->insertmode));
190:       }
191:     }
192:     PetscCall(MatSeqAIJRestoreArrayRead(B, &ba));
193:     PetscCall(MatDestroy(&B));
194:   }
195:   PetscCall(PetscFree(aij->garray));

197:   aij->B           = Bnew;
198:   A->was_assembled = PETSC_FALSE;
199:   A->assembled     = PETSC_FALSE;
200:   PetscFunctionReturn(PETSC_SUCCESS);
201: }

203: /*      ugly stuff added for Glenn someday we should fix this up */

205: static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
206: static Vec       auglydd = NULL, auglyoo = NULL;        /* work vectors used to scale the two parts of the local matrix */

208: static PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale)
209: {
210:   Mat_MPIAIJ *ina = (Mat_MPIAIJ *)inA->data; /*access private part of matrix */
211:   PetscInt    i, j, n, nt, cstart, cend, no, *garray = ina->garray, *lindices, bs = inA->rmap->bs;
212:   PetscInt   *r_rmapd, *r_rmapo;

214:   PetscFunctionBegin;
215:   PetscCall(MatGetOwnershipRange(inA, &cstart, &cend));
216:   PetscCall(MatGetSize(ina->A, NULL, &n));
217:   PetscCall(PetscCalloc1(inA->rmap->mapping->n, &r_rmapd));
218:   nt = 0;
219:   for (i = 0; i < inA->rmap->mapping->n; i++) {
220:     if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) {
221:       nt++;
222:       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
223:     }
224:   }
225:   PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n);
226:   PetscCall(PetscMalloc1(n, &auglyrmapd));
227:   for (i = 0; i < inA->rmap->mapping->n; i++) {
228:     if (r_rmapd[i]) {
229:       for (j = 0; j < bs; j++) auglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j;
230:     }
231:   }
232:   PetscCall(PetscFree(r_rmapd));
233:   PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd));

235:   /* This loop handling the setup for the off-diagonal local portion of the matrix is extremely similar to
236:      its counterpart in the MPIBAIJ version of this code.
237:      The tricky difference is that lindices[] uses block-based indexing (just as in the BAIJ code),
238:      but garray[] uses element-based indexing; hence the multiplications / divisions involving bs. */
239:   PetscCall(PetscCalloc1(inA->cmap->N / bs, &lindices));
240:   for (i = 0; i < ina->B->cmap->n / bs; i++) lindices[garray[i * bs] / bs] = i + 1;
241:   no = inA->rmap->mapping->n - nt;
242:   PetscCall(PetscCalloc1(inA->rmap->mapping->n, &r_rmapo));
243:   nt = 0;
244:   for (i = 0; i < inA->rmap->mapping->n; i++) {
245:     if (lindices[inA->rmap->mapping->indices[i]]) {
246:       nt++;
247:       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
248:     }
249:   }
250:   PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n);
251:   PetscCall(PetscFree(lindices));
252:   PetscCall(PetscMalloc1(nt * bs, &auglyrmapo));
253:   for (i = 0; i < inA->rmap->mapping->n; i++) {
254:     if (r_rmapo[i]) {
255:       for (j = 0; j < bs; j++) auglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j;
256:     }
257:   }
258:   PetscCall(PetscFree(r_rmapo));
259:   PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &auglyoo));
260:   PetscFunctionReturn(PETSC_SUCCESS);
261: }

263: PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A, Vec scale)
264: {
265:   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data; /*access private part of matrix */
266:   PetscInt           n, i;
267:   PetscScalar       *d, *o;
268:   const PetscScalar *s;

270:   PetscFunctionBegin;
271:   if (!auglyrmapd) PetscCall(MatMPIAIJDiagonalScaleLocalSetUp(A, scale));

273:   PetscCall(VecGetArrayRead(scale, &s));

275:   PetscCall(VecGetLocalSize(auglydd, &n));
276:   PetscCall(VecGetArray(auglydd, &d));
277:   for (i = 0; i < n; i++) d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
278:   PetscCall(VecRestoreArray(auglydd, &d));
279:   /* column scale "diagonal" portion of local matrix */
280:   PetscCall(MatDiagonalScale(a->A, NULL, auglydd));

282:   PetscCall(VecGetLocalSize(auglyoo, &n));
283:   PetscCall(VecGetArray(auglyoo, &o));
284:   for (i = 0; i < n; i++) o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
285:   PetscCall(VecRestoreArrayRead(scale, &s));
286:   PetscCall(VecRestoreArray(auglyoo, &o));
287:   /* column scale "off-diagonal" portion of local matrix */
288:   PetscCall(MatDiagonalScale(a->B, NULL, auglyoo));
289:   PetscCall(PetscFree(auglyrmapd));
290:   PetscCall(PetscFree(auglyrmapo));
291:   PetscCall(VecDestroy(&auglydd));
292:   PetscCall(VecDestroy(&auglyoo));
293:   PetscFunctionReturn(PETSC_SUCCESS);
294: }