Actual source code: mmsbaij.c

  1: /*
  2:    Support for the parallel SBAIJ matrix vector multiply
  3: */
  4: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>

  6: PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
  7: {
  8:   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ *)mat->data;
  9:   Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ *)sbaij->B->data;
 10:   PetscInt        i, j, *aj = B->j, ec = 0, *garray, *sgarray;
 11:   PetscInt        bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt;
 12:   IS              from, to;
 13:   Vec             gvec;
 14:   PetscMPIInt     rank   = sbaij->rank, lsize;
 15:   PetscInt       *owners = sbaij->rangebs, *ec_owner, k;
 16:   const PetscInt *sowners;
 17:   PetscScalar    *ptr;
 18: #if defined(PETSC_USE_CTABLE)
 19:   PetscHMapI    gid1_lid1 = NULL; /* one-based gid to lid table */
 20:   PetscHashIter tpos;
 21:   PetscInt      gid, lid;
 22: #else
 23:   PetscInt  Nbs = sbaij->Nbs;
 24:   PetscInt *indices;
 25: #endif

 27:   PetscFunctionBegin;
 28: #if defined(PETSC_USE_CTABLE)
 29:   PetscCall(PetscHMapICreateWithSize(mbs, &gid1_lid1));
 30:   for (i = 0; i < B->mbs; i++) {
 31:     for (j = 0; j < B->ilen[i]; j++) {
 32:       PetscInt data, gid1 = aj[B->i[i] + j] + 1;
 33:       PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
 34:       if (!data) PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
 35:     }
 36:   }
 37:   /* form array of columns we need */
 38:   PetscCall(PetscMalloc1(ec, &garray));
 39:   PetscHashIterBegin(gid1_lid1, tpos);
 40:   while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
 41:     PetscHashIterGetKey(gid1_lid1, tpos, gid);
 42:     PetscHashIterGetVal(gid1_lid1, tpos, lid);
 43:     PetscHashIterNext(gid1_lid1, tpos);
 44:     gid--;
 45:     lid--;
 46:     garray[lid] = gid;
 47:   }
 48:   PetscCall(PetscSortInt(ec, garray));
 49:   PetscCall(PetscHMapIClear(gid1_lid1));
 50:   for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
 51:   /* compact out the extra columns in B */
 52:   for (i = 0; i < B->mbs; i++) {
 53:     for (j = 0; j < B->ilen[i]; j++) {
 54:       PetscInt gid1 = aj[B->i[i] + j] + 1;
 55:       PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
 56:       lid--;
 57:       aj[B->i[i] + j] = lid;
 58:     }
 59:   }
 60:   PetscCall(PetscHMapIDestroy(&gid1_lid1));
 61:   PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
 62:   for (i = j = 0; i < ec; i++) {
 63:     while (garray[i] >= owners[j + 1]) j++;
 64:     ec_owner[i] = j;
 65:   }
 66: #else
 67:   /* For the first stab we make an array as long as the number of columns */
 68:   /* mark those columns that are in sbaij->B */
 69:   PetscCall(PetscCalloc1(Nbs, &indices));
 70:   for (i = 0; i < mbs; i++) {
 71:     for (j = 0; j < B->ilen[i]; j++) {
 72:       if (!indices[aj[B->i[i] + j]]) ec++;
 73:       indices[aj[B->i[i] + j]] = 1;
 74:     }
 75:   }

 77:   /* form arrays of columns we need */
 78:   PetscCall(PetscMalloc1(ec, &garray));
 79:   PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));

 81:   ec = 0;
 82:   for (j = 0; j < sbaij->size; j++) {
 83:     for (i = owners[j]; i < owners[j + 1]; i++) {
 84:       if (indices[i]) {
 85:         garray[ec]   = i;
 86:         ec_owner[ec] = j;
 87:         ec++;
 88:       }
 89:     }
 90:   }

 92:   /* make indices now point into garray */
 93:   for (i = 0; i < ec; i++) indices[garray[i]] = i;

 95:   /* compact out the extra columns in B */
 96:   for (i = 0; i < mbs; i++) {
 97:     for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
 98:   }
 99:   PetscCall(PetscFree(indices));
100: #endif
101:   B->nbs = ec;
102:   PetscCall(PetscLayoutDestroy(&sbaij->B->cmap));
103:   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap));

105:   PetscCall(VecScatterDestroy(&sbaij->sMvctx));
106:   /* create local vector that is used to scatter into */
107:   PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec));

109:   /* create two temporary index sets for building scatter-gather */
110:   PetscCall(PetscMalloc1(2 * ec, &stmp));
111:   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from));
112:   for (i = 0; i < ec; i++) stmp[i] = i;
113:   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to));

115:   /* generate the scatter context
116:      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */
117:   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
118:   PetscCall(VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx));
119:   PetscCall(VecDestroy(&gvec));

121:   sbaij->garray = garray;

123:   PetscCall(ISDestroy(&from));
124:   PetscCall(ISDestroy(&to));

126:   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
127:   lsize = (mbs + ec) * bs;
128:   PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat), lsize, PETSC_DETERMINE, &sbaij->slvec0));
129:   PetscCall(VecDuplicate(sbaij->slvec0, &sbaij->slvec1));
130:   PetscCall(VecGetSize(sbaij->slvec0, &vec_size));

132:   PetscCall(VecGetOwnershipRanges(sbaij->slvec0, &sowners));

134:   /* x index in the IS sfrom */
135:   for (i = 0; i < ec; i++) {
136:     j          = ec_owner[i];
137:     sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]);
138:   }
139:   /* b index in the IS sfrom */
140:   k = sowners[rank] / bs + mbs;
141:   for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j;
142:   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from));

144:   /* x index in the IS sto */
145:   k = sowners[rank] / bs + mbs;
146:   for (i = 0; i < ec; i++) stmp[i] = (k + i);
147:   /* b index in the IS sto */
148:   for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec];

150:   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to));

152:   PetscCall(VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx));
153:   PetscCall(VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view"));

155:   PetscCall(VecGetLocalSize(sbaij->slvec1, &nt));
156:   PetscCall(VecGetArray(sbaij->slvec1, &ptr));
157:   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a));
158:   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, PetscSafePointerPlusOffset(ptr, bs * mbs), &sbaij->slvec1b));
159:   PetscCall(VecRestoreArray(sbaij->slvec1, &ptr));

161:   PetscCall(VecGetArray(sbaij->slvec0, &ptr));
162:   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, PetscSafePointerPlusOffset(ptr, bs * mbs), &sbaij->slvec0b));
163:   PetscCall(VecRestoreArray(sbaij->slvec0, &ptr));

165:   PetscCall(PetscFree(stmp));

167:   PetscCall(ISDestroy(&from));
168:   PetscCall(ISDestroy(&to));
169:   PetscCall(PetscFree2(sgarray, ec_owner));
170:   PetscFunctionReturn(PETSC_SUCCESS);
171: }

173: /*
174:      Takes the local part of an already assembled MPISBAIJ matrix
175:    and disassembles it. This is to allow new nonzeros into the matrix
176:    that require more communication in the matrix vector multiply.
177:    Thus certain data-structures must be rebuilt.

179:    Kind of slow! But that's what application programmers get when
180:    they are sloppy.
181: */
182: PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
183: {
184:   Mat_MPISBAIJ *baij  = (Mat_MPISBAIJ *)A->data;
185:   Mat           B     = baij->B, Bnew;
186:   Mat_SeqBAIJ  *Bbaij = (Mat_SeqBAIJ *)B->data;
187:   PetscInt      i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray;
188:   PetscInt      k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, m = A->rmap->n;
189:   MatScalar    *a = Bbaij->a;
190:   PetscScalar  *atmp;
191: #if defined(PETSC_USE_REAL_MAT_SINGLE)
192:   PetscInt l;
193: #endif

195:   PetscFunctionBegin;
196: #if defined(PETSC_USE_REAL_MAT_SINGLE)
197:   PetscCall(PetscMalloc1(A->rmap->bs, &atmp));
198: #endif
199:   /* free stuff related to matrix-vec multiply */
200:   PetscCall(VecDestroy(&baij->lvec));
201:   PetscCall(VecScatterDestroy(&baij->Mvctx));

203:   PetscCall(VecDestroy(&baij->slvec0));
204:   PetscCall(VecDestroy(&baij->slvec0b));
205:   PetscCall(VecDestroy(&baij->slvec1));
206:   PetscCall(VecDestroy(&baij->slvec1a));
207:   PetscCall(VecDestroy(&baij->slvec1b));

209:   if (baij->colmap) {
210: #if defined(PETSC_USE_CTABLE)
211:     PetscCall(PetscHMapIDestroy(&baij->colmap));
212: #else
213:     PetscCall(PetscFree(baij->colmap));
214: #endif
215:   }

217:   /* make sure that B is assembled so we can access its values */
218:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
219:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));

221:   /* invent new B and copy stuff over */
222:   PetscCall(PetscMalloc1(mbs, &nz));
223:   for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
224:   PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
225:   PetscCall(MatSetSizes(Bnew, m, n, m, n));
226:   PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
227:   PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz));
228:   PetscCall(PetscFree(nz));

230:   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
231:     ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
232:   }

234:   /*
235:    Ensure that B's nonzerostate is monotonically increasing.
236:    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
237:    */
238:   Bnew->nonzerostate = B->nonzerostate;
239:   PetscCall(PetscMalloc1(bs, &rvals));
240:   for (i = 0; i < mbs; i++) {
241:     rvals[0] = bs * i;
242:     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
243:     for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
244:       col = garray[Bbaij->j[j]] * bs;
245:       for (k = 0; k < bs; k++) {
246: #if defined(PETSC_USE_REAL_MAT_SINGLE)
247:         for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l];
248: #else
249:         atmp = a + j * bs2 + k * bs;
250: #endif
251:         PetscCall(MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode));
252:         col++;
253:       }
254:     }
255:   }
256: #if defined(PETSC_USE_REAL_MAT_SINGLE)
257:   PetscCall(PetscFree(atmp));
258: #endif
259:   PetscCall(PetscFree(baij->garray));

261:   baij->garray = NULL;

263:   PetscCall(PetscFree(rvals));
264:   PetscCall(MatDestroy(&B));

266:   baij->B = Bnew;

268:   A->was_assembled = PETSC_FALSE;
269:   PetscFunctionReturn(PETSC_SUCCESS);
270: }