Actual source code: mmaij.c
1: /*
2: Support for the parallel AIJ matrix vector multiply
3: */
4: #include <../src/mat/impls/aij/mpi/mpiaij.h>
5: #include <petsc/private/vecimpl.h>
6: #include <petsc/private/isimpl.h>
8: PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat)
9: {
10: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
11: Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data;
12: PetscInt i, j, *aj = B->j, *garray;
13: PetscInt ec = 0; /* Number of nonzero external columns */
14: IS from, to;
15: Vec gvec;
16: #if defined(PETSC_USE_CTABLE)
17: PetscHMapI gid1_lid1 = NULL;
18: PetscHashIter tpos;
19: PetscInt gid, lid;
20: #else
21: PetscInt N = mat->cmap->N, *indices;
22: #endif
24: PetscFunctionBegin;
25: if (!aij->garray) {
26: PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat");
27: #if defined(PETSC_USE_CTABLE)
28: /* use a table */
29: PetscCall(PetscHMapICreateWithSize(aij->B->rmap->n, &gid1_lid1));
30: for (i = 0; i < aij->B->rmap->n; i++) {
31: for (j = 0; j < B->ilen[i]; j++) {
32: PetscInt data, gid1 = aj[B->i[i] + j] + 1;
33: PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
34: if (!data) {
35: /* one based table */
36: PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
37: }
38: }
39: }
40: /* form array of columns we need */
41: PetscCall(PetscMalloc1(ec, &garray));
42: PetscHashIterBegin(gid1_lid1, tpos);
43: while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
44: PetscHashIterGetKey(gid1_lid1, tpos, gid);
45: PetscHashIterGetVal(gid1_lid1, tpos, lid);
46: PetscHashIterNext(gid1_lid1, tpos);
47: gid--;
48: lid--;
49: garray[lid] = gid;
50: }
51: PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */
52: PetscCall(PetscHMapIClear(gid1_lid1));
53: for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
54: /* compact out the extra columns in B */
55: for (i = 0; i < aij->B->rmap->n; i++) {
56: for (j = 0; j < B->ilen[i]; j++) {
57: PetscInt gid1 = aj[B->i[i] + j] + 1;
58: PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
59: lid--;
60: aj[B->i[i] + j] = lid;
61: }
62: }
63: PetscCall(PetscLayoutDestroy(&aij->B->cmap));
64: PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap));
65: PetscCall(PetscHMapIDestroy(&gid1_lid1));
66: #else
67: /* Make an array as long as the number of columns */
68: /* mark those columns that are in aij->B */
69: PetscCall(PetscCalloc1(N, &indices));
70: for (i = 0; i < aij->B->rmap->n; i++) {
71: for (j = 0; j < B->ilen[i]; j++) {
72: if (!indices[aj[B->i[i] + j]]) ec++;
73: indices[aj[B->i[i] + j]] = 1;
74: }
75: }
77: /* form array of columns we need */
78: PetscCall(PetscMalloc1(ec, &garray));
79: ec = 0;
80: for (i = 0; i < N; i++) {
81: if (indices[i]) garray[ec++] = i;
82: }
84: /* make indices now point into garray */
85: for (i = 0; i < ec; i++) indices[garray[i]] = i;
87: /* compact out the extra columns in B */
88: for (i = 0; i < aij->B->rmap->n; i++) {
89: for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
90: }
91: PetscCall(PetscLayoutDestroy(&aij->B->cmap));
92: PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap));
93: PetscCall(PetscFree(indices));
94: #endif
95: // If we did rewrite B->j[] and B->cmap, increase the state. Device matrices detect this state change to sync the device copy.
96: if (ec) aij->B->nonzerostate++;
97: } else {
98: garray = aij->garray;
99: }
101: if (!aij->lvec) {
102: PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat");
103: PetscCall(MatCreateVecs(aij->B, &aij->lvec, NULL));
104: }
105: PetscCall(VecGetSize(aij->lvec, &ec));
107: /* create two temporary Index sets for build scatter gather */
108: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from));
109: PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to));
111: /* create temporary global vector to generate scatter context */
112: /* This does not allocate the array's memory so is efficient */
113: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
115: /* generate the scatter context */
116: PetscCall(VecScatterDestroy(&aij->Mvctx));
117: PetscCall(VecScatterCreate(gvec, from, aij->lvec, to, &aij->Mvctx));
118: PetscCall(VecScatterViewFromOptions(aij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
119: aij->garray = garray;
121: PetscCall(ISDestroy(&from));
122: PetscCall(ISDestroy(&to));
123: PetscCall(VecDestroy(&gvec));
124: PetscFunctionReturn(PETSC_SUCCESS);
125: }
127: /* Disassemble the off-diag portion of the MPIAIJXxx matrix.
128: In other words, change the B from reduced format using local col ids
129: to expanded format using global col ids, which will make it easier to
130: insert new nonzeros (with global col ids) into the matrix.
131: The off-diag B determines communication in the matrix vector multiply.
132: use_preallocation determines whether the use the preallocation or
133: existing non-zero structure when creating the global form of B
134: */
135: PetscErrorCode MatDisAssemble_MPIAIJ(Mat A, PetscBool use_preallocation)
136: {
137: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data;
138: Mat B = aij->B, Bnew = NULL;
140: PetscFunctionBegin;
141: /* free stuff related to matrix-vec multiply */
142: PetscCall(VecDestroy(&aij->lvec));
143: if (aij->colmap) {
144: #if defined(PETSC_USE_CTABLE)
145: PetscCall(PetscHMapIDestroy(&aij->colmap));
146: #else
147: PetscCall(PetscFree(aij->colmap));
148: #endif
149: }
151: if (B) {
152: Mat_SeqAIJ *Baij = (Mat_SeqAIJ *)B->data;
153: PetscInt i, j, m = B->rmap->n, n = A->cmap->N, col, ct = 0, *garray = aij->garray, *nz;
154: PetscScalar v;
155: const PetscScalar *ba;
157: /* make sure that B is assembled so we can access its values */
158: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
159: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
161: /* invent new B and copy stuff over */
162: PetscCall(PetscMalloc1(m + 1, &nz));
163: if (use_preallocation)
164: for (i = 0; i < m; i++) nz[i] = Baij->ipre[i];
165: else
166: for (i = 0; i < m; i++) nz[i] = Baij->i[i + 1] - Baij->i[i];
167: PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
168: PetscCall(MatSetSizes(Bnew, m, n, m, n)); /* Bnew now uses A->cmap->N as its col size */
169: PetscCall(MatSetBlockSizesFromMats(Bnew, A, A));
170: PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
171: PetscCall(MatSeqAIJSetPreallocation(Bnew, 0, nz));
173: if (Baij->nonew >= 0) { /* Inherit insertion error options (if positive). */
174: ((Mat_SeqAIJ *)Bnew->data)->nonew = Baij->nonew;
175: }
177: /*
178: Ensure that B's nonzerostate is monotonically increasing.
179: Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
180: */
181: Bnew->nonzerostate = B->nonzerostate;
183: PetscCall(PetscFree(nz));
184: PetscCall(MatSeqAIJGetArrayRead(B, &ba));
185: for (i = 0; i < m; i++) {
186: for (j = Baij->i[i]; j < Baij->i[i + 1]; j++) {
187: col = garray[Baij->j[ct]];
188: v = ba[ct++];
189: PetscCall(MatSetValues(Bnew, 1, &i, 1, &col, &v, B->insertmode));
190: }
191: }
192: PetscCall(MatSeqAIJRestoreArrayRead(B, &ba));
193: PetscCall(MatDestroy(&B));
194: }
195: PetscCall(PetscFree(aij->garray));
197: aij->B = Bnew;
198: A->was_assembled = PETSC_FALSE;
199: A->assembled = PETSC_FALSE;
200: PetscFunctionReturn(PETSC_SUCCESS);
201: }
203: /* ugly stuff added for Glenn someday we should fix this up */
205: static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
206: static Vec auglydd = NULL, auglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */
208: static PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale)
209: {
210: Mat_MPIAIJ *ina = (Mat_MPIAIJ *)inA->data; /*access private part of matrix */
211: PetscInt i, j, n, nt, cstart, cend, no, *garray = ina->garray, *lindices, bs = inA->rmap->bs;
212: PetscInt *r_rmapd, *r_rmapo;
214: PetscFunctionBegin;
215: PetscCall(MatGetOwnershipRange(inA, &cstart, &cend));
216: PetscCall(MatGetSize(ina->A, NULL, &n));
217: PetscCall(PetscCalloc1(inA->rmap->mapping->n, &r_rmapd));
218: nt = 0;
219: for (i = 0; i < inA->rmap->mapping->n; i++) {
220: if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) {
221: nt++;
222: r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
223: }
224: }
225: PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n);
226: PetscCall(PetscMalloc1(n, &auglyrmapd));
227: for (i = 0; i < inA->rmap->mapping->n; i++) {
228: if (r_rmapd[i]) {
229: for (j = 0; j < bs; j++) auglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j;
230: }
231: }
232: PetscCall(PetscFree(r_rmapd));
233: PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd));
235: /* This loop handling the setup for the off-diagonal local portion of the matrix is extremely similar to
236: its counterpart in the MPIBAIJ version of this code.
237: The tricky difference is that lindices[] uses block-based indexing (just as in the BAIJ code),
238: but garray[] uses element-based indexing; hence the multiplications / divisions involving bs. */
239: PetscCall(PetscCalloc1(inA->cmap->N / bs, &lindices));
240: for (i = 0; i < ina->B->cmap->n / bs; i++) lindices[garray[i * bs] / bs] = i + 1;
241: no = inA->rmap->mapping->n - nt;
242: PetscCall(PetscCalloc1(inA->rmap->mapping->n, &r_rmapo));
243: nt = 0;
244: for (i = 0; i < inA->rmap->mapping->n; i++) {
245: if (lindices[inA->rmap->mapping->indices[i]]) {
246: nt++;
247: r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
248: }
249: }
250: PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n);
251: PetscCall(PetscFree(lindices));
252: PetscCall(PetscMalloc1(nt * bs, &auglyrmapo));
253: for (i = 0; i < inA->rmap->mapping->n; i++) {
254: if (r_rmapo[i]) {
255: for (j = 0; j < bs; j++) auglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j;
256: }
257: }
258: PetscCall(PetscFree(r_rmapo));
259: PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &auglyoo));
260: PetscFunctionReturn(PETSC_SUCCESS);
261: }
263: PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A, Vec scale)
264: {
265: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; /*access private part of matrix */
266: PetscInt n, i;
267: PetscScalar *d, *o;
268: const PetscScalar *s;
270: PetscFunctionBegin;
271: if (!auglyrmapd) PetscCall(MatMPIAIJDiagonalScaleLocalSetUp(A, scale));
273: PetscCall(VecGetArrayRead(scale, &s));
275: PetscCall(VecGetLocalSize(auglydd, &n));
276: PetscCall(VecGetArray(auglydd, &d));
277: for (i = 0; i < n; i++) d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
278: PetscCall(VecRestoreArray(auglydd, &d));
279: /* column scale "diagonal" portion of local matrix */
280: PetscCall(MatDiagonalScale(a->A, NULL, auglydd));
282: PetscCall(VecGetLocalSize(auglyoo, &n));
283: PetscCall(VecGetArray(auglyoo, &o));
284: for (i = 0; i < n; i++) o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
285: PetscCall(VecRestoreArrayRead(scale, &s));
286: PetscCall(VecRestoreArray(auglyoo, &o));
287: /* column scale "off-diagonal" portion of local matrix */
288: PetscCall(MatDiagonalScale(a->B, NULL, auglyoo));
289: PetscCall(PetscFree(auglyrmapd));
290: PetscCall(PetscFree(auglyrmapo));
291: PetscCall(VecDestroy(&auglydd));
292: PetscCall(VecDestroy(&auglyoo));
293: PetscFunctionReturn(PETSC_SUCCESS);
294: }