Actual source code: mmsbaij.c
1: /*
2: Support for the parallel SBAIJ matrix vector multiply
3: */
4: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
6: PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
7: {
8: Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ *)mat->data;
9: Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)sbaij->B->data;
10: PetscInt i, j, *aj = B->j, ec = 0, *garray, *sgarray;
11: PetscInt bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt;
12: IS from, to;
13: Vec gvec;
14: PetscMPIInt rank = sbaij->rank, lsize;
15: PetscInt *owners = sbaij->rangebs, *ec_owner, k;
16: const PetscInt *sowners;
17: PetscScalar *ptr;
18: #if defined(PETSC_USE_CTABLE)
19: PetscHMapI gid1_lid1 = NULL; /* one-based gid to lid table */
20: PetscHashIter tpos;
21: PetscInt gid, lid;
22: #else
23: PetscInt Nbs = sbaij->Nbs;
24: PetscInt *indices;
25: #endif
27: PetscFunctionBegin;
28: #if defined(PETSC_USE_CTABLE)
29: PetscCall(PetscHMapICreateWithSize(mbs, &gid1_lid1));
30: for (i = 0; i < B->mbs; i++) {
31: for (j = 0; j < B->ilen[i]; j++) {
32: PetscInt data, gid1 = aj[B->i[i] + j] + 1;
33: PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
34: if (!data) PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
35: }
36: }
37: /* form array of columns we need */
38: PetscCall(PetscMalloc1(ec, &garray));
39: PetscHashIterBegin(gid1_lid1, tpos);
40: while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
41: PetscHashIterGetKey(gid1_lid1, tpos, gid);
42: PetscHashIterGetVal(gid1_lid1, tpos, lid);
43: PetscHashIterNext(gid1_lid1, tpos);
44: gid--;
45: lid--;
46: garray[lid] = gid;
47: }
48: PetscCall(PetscSortInt(ec, garray));
49: PetscCall(PetscHMapIClear(gid1_lid1));
50: for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
51: /* compact out the extra columns in B */
52: for (i = 0; i < B->mbs; i++) {
53: for (j = 0; j < B->ilen[i]; j++) {
54: PetscInt gid1 = aj[B->i[i] + j] + 1;
55: PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
56: lid--;
57: aj[B->i[i] + j] = lid;
58: }
59: }
60: PetscCall(PetscHMapIDestroy(&gid1_lid1));
61: PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
62: for (i = j = 0; i < ec; i++) {
63: while (garray[i] >= owners[j + 1]) j++;
64: ec_owner[i] = j;
65: }
66: #else
67: /* For the first stab we make an array as long as the number of columns */
68: /* mark those columns that are in sbaij->B */
69: PetscCall(PetscCalloc1(Nbs, &indices));
70: for (i = 0; i < mbs; i++) {
71: for (j = 0; j < B->ilen[i]; j++) {
72: if (!indices[aj[B->i[i] + j]]) ec++;
73: indices[aj[B->i[i] + j]] = 1;
74: }
75: }
77: /* form arrays of columns we need */
78: PetscCall(PetscMalloc1(ec, &garray));
79: PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
81: ec = 0;
82: for (j = 0; j < sbaij->size; j++) {
83: for (i = owners[j]; i < owners[j + 1]; i++) {
84: if (indices[i]) {
85: garray[ec] = i;
86: ec_owner[ec] = j;
87: ec++;
88: }
89: }
90: }
92: /* make indices now point into garray */
93: for (i = 0; i < ec; i++) indices[garray[i]] = i;
95: /* compact out the extra columns in B */
96: for (i = 0; i < mbs; i++) {
97: for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
98: }
99: PetscCall(PetscFree(indices));
100: #endif
101: B->nbs = ec;
102: PetscCall(PetscLayoutDestroy(&sbaij->B->cmap));
103: PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap));
105: PetscCall(VecScatterDestroy(&sbaij->sMvctx));
106: /* create local vector that is used to scatter into */
107: PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec));
109: /* create two temporary index sets for building scatter-gather */
110: PetscCall(PetscMalloc1(2 * ec, &stmp));
111: PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from));
112: for (i = 0; i < ec; i++) stmp[i] = i;
113: PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to));
115: /* generate the scatter context
116: -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */
117: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
118: PetscCall(VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx));
119: PetscCall(VecDestroy(&gvec));
121: sbaij->garray = garray;
123: PetscCall(ISDestroy(&from));
124: PetscCall(ISDestroy(&to));
126: /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
127: lsize = (mbs + ec) * bs;
128: PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat), lsize, PETSC_DETERMINE, &sbaij->slvec0));
129: PetscCall(VecDuplicate(sbaij->slvec0, &sbaij->slvec1));
130: PetscCall(VecGetSize(sbaij->slvec0, &vec_size));
132: PetscCall(VecGetOwnershipRanges(sbaij->slvec0, &sowners));
134: /* x index in the IS sfrom */
135: for (i = 0; i < ec; i++) {
136: j = ec_owner[i];
137: sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]);
138: }
139: /* b index in the IS sfrom */
140: k = sowners[rank] / bs + mbs;
141: for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j;
142: PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from));
144: /* x index in the IS sto */
145: k = sowners[rank] / bs + mbs;
146: for (i = 0; i < ec; i++) stmp[i] = (k + i);
147: /* b index in the IS sto */
148: for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec];
150: PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to));
152: PetscCall(VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx));
153: PetscCall(VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
155: PetscCall(VecGetLocalSize(sbaij->slvec1, &nt));
156: PetscCall(VecGetArray(sbaij->slvec1, &ptr));
157: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a));
158: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, PetscSafePointerPlusOffset(ptr, bs * mbs), &sbaij->slvec1b));
159: PetscCall(VecRestoreArray(sbaij->slvec1, &ptr));
161: PetscCall(VecGetArray(sbaij->slvec0, &ptr));
162: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, PetscSafePointerPlusOffset(ptr, bs * mbs), &sbaij->slvec0b));
163: PetscCall(VecRestoreArray(sbaij->slvec0, &ptr));
165: PetscCall(PetscFree(stmp));
167: PetscCall(ISDestroy(&from));
168: PetscCall(ISDestroy(&to));
169: PetscCall(PetscFree2(sgarray, ec_owner));
170: PetscFunctionReturn(PETSC_SUCCESS);
171: }
173: /*
174: Takes the local part of an already assembled MPISBAIJ matrix
175: and disassembles it. This is to allow new nonzeros into the matrix
176: that require more communication in the matrix vector multiply.
177: Thus certain data-structures must be rebuilt.
179: Kind of slow! But that's what application programmers get when
180: they are sloppy.
181: */
182: PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
183: {
184: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)A->data;
185: Mat B = baij->B, Bnew;
186: Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data;
187: PetscInt i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray;
188: PetscInt k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, m = A->rmap->n;
189: MatScalar *a = Bbaij->a;
190: PetscScalar *atmp;
191: #if defined(PETSC_USE_REAL_MAT_SINGLE)
192: PetscInt l;
193: #endif
195: PetscFunctionBegin;
196: #if defined(PETSC_USE_REAL_MAT_SINGLE)
197: PetscCall(PetscMalloc1(A->rmap->bs, &atmp));
198: #endif
199: /* free stuff related to matrix-vec multiply */
200: PetscCall(VecDestroy(&baij->lvec));
201: PetscCall(VecScatterDestroy(&baij->Mvctx));
203: PetscCall(VecDestroy(&baij->slvec0));
204: PetscCall(VecDestroy(&baij->slvec0b));
205: PetscCall(VecDestroy(&baij->slvec1));
206: PetscCall(VecDestroy(&baij->slvec1a));
207: PetscCall(VecDestroy(&baij->slvec1b));
209: if (baij->colmap) {
210: #if defined(PETSC_USE_CTABLE)
211: PetscCall(PetscHMapIDestroy(&baij->colmap));
212: #else
213: PetscCall(PetscFree(baij->colmap));
214: #endif
215: }
217: /* make sure that B is assembled so we can access its values */
218: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
219: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
221: /* invent new B and copy stuff over */
222: PetscCall(PetscMalloc1(mbs, &nz));
223: for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
224: PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
225: PetscCall(MatSetSizes(Bnew, m, n, m, n));
226: PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
227: PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz));
228: PetscCall(PetscFree(nz));
230: if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
231: ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
232: }
234: /*
235: Ensure that B's nonzerostate is monotonically increasing.
236: Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
237: */
238: Bnew->nonzerostate = B->nonzerostate;
239: PetscCall(PetscMalloc1(bs, &rvals));
240: for (i = 0; i < mbs; i++) {
241: rvals[0] = bs * i;
242: for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
243: for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
244: col = garray[Bbaij->j[j]] * bs;
245: for (k = 0; k < bs; k++) {
246: #if defined(PETSC_USE_REAL_MAT_SINGLE)
247: for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l];
248: #else
249: atmp = a + j * bs2 + k * bs;
250: #endif
251: PetscCall(MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode));
252: col++;
253: }
254: }
255: }
256: #if defined(PETSC_USE_REAL_MAT_SINGLE)
257: PetscCall(PetscFree(atmp));
258: #endif
259: PetscCall(PetscFree(baij->garray));
261: baij->garray = NULL;
263: PetscCall(PetscFree(rvals));
264: PetscCall(MatDestroy(&B));
266: baij->B = Bnew;
268: A->was_assembled = PETSC_FALSE;
269: PetscFunctionReturn(PETSC_SUCCESS);
270: }