Actual source code: matelem.cxx

  1: #include <petsc/private/petscelemental.h>

  3: const char       ElementalCitation[] = "@Article{Elemental2012,\n"
  4:                                        "  author  = {Jack Poulson and Bryan Marker and Jeff R. Hammond and Nichols A. Romero and Robert {v}an~{d}e~{G}eijn},\n"
  5:                                        "  title   = {Elemental: A New Framework for Distributed Memory Dense Matrix Computations},\n"
  6:                                        "  journal = {{ACM} Transactions on Mathematical Software},\n"
  7:                                        "  volume  = {39},\n"
  8:                                        "  number  = {2},\n"
  9:                                        "  year    = {2013}\n"
 10:                                        "}\n";
 11: static PetscBool ElementalCite       = PETSC_FALSE;

 13: /*
 14:     The variable Petsc_Elemental_keyval is used to indicate an MPI attribute that
 15:   is attached to a communicator, in this case the attribute is a Mat_Elemental_Grid
 16: */
 17: static PetscMPIInt Petsc_Elemental_keyval = MPI_KEYVAL_INVALID;

 19: static PetscErrorCode MatView_Elemental(Mat A, PetscViewer viewer)
 20: {
 21:   Mat_Elemental *a = (Mat_Elemental *)A->data;
 22:   PetscBool      iascii;

 24:   PetscFunctionBegin;
 25:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
 26:   if (iascii) {
 27:     PetscViewerFormat format;
 28:     PetscCall(PetscViewerGetFormat(viewer, &format));
 29:     if (format == PETSC_VIEWER_ASCII_INFO) {
 30:       /* call elemental viewing function */
 31:       PetscCall(PetscViewerASCIIPrintf(viewer, "Elemental run parameters:\n"));
 32:       PetscCall(PetscViewerASCIIPrintf(viewer, "  allocated entries=%zu\n", (*a->emat).AllocatedMemory()));
 33:       PetscCall(PetscViewerASCIIPrintf(viewer, "  grid height=%d, grid width=%d\n", (*a->emat).Grid().Height(), (*a->emat).Grid().Width()));
 34:       if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
 35:         /* call elemental viewing function */
 36:         PetscCall(PetscPrintf(PetscObjectComm((PetscObject)viewer), "test matview_elemental 2\n"));
 37:       }

 39:     } else if (format == PETSC_VIEWER_DEFAULT) {
 40:       PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
 41:       El::Print(*a->emat, "Elemental matrix (cyclic ordering)");
 42:       PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
 43:       if (A->factortype == MAT_FACTOR_NONE) {
 44:         Mat Adense;
 45:         PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
 46:         PetscCall(MatView(Adense, viewer));
 47:         PetscCall(MatDestroy(&Adense));
 48:       }
 49:     } else SETERRQ(PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Format");
 50:   } else {
 51:     /* convert to dense format and call MatView() */
 52:     Mat Adense;
 53:     PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
 54:     PetscCall(MatView(Adense, viewer));
 55:     PetscCall(MatDestroy(&Adense));
 56:   }
 57:   PetscFunctionReturn(PETSC_SUCCESS);
 58: }

 60: static PetscErrorCode MatGetInfo_Elemental(Mat A, MatInfoType flag, MatInfo *info)
 61: {
 62:   Mat_Elemental *a = (Mat_Elemental *)A->data;

 64:   PetscFunctionBegin;
 65:   info->block_size = 1.0;

 67:   if (flag == MAT_LOCAL) {
 68:     info->nz_allocated = (*a->emat).AllocatedMemory(); /* locally allocated */
 69:     info->nz_used      = info->nz_allocated;
 70:   } else if (flag == MAT_GLOBAL_MAX) {
 71:     //PetscCallMPI(MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin)));
 72:     /* see MatGetInfo_MPIAIJ() for getting global info->nz_allocated! */
 73:     //SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," MAT_GLOBAL_MAX not written yet");
 74:   } else if (flag == MAT_GLOBAL_SUM) {
 75:     //SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," MAT_GLOBAL_SUM not written yet");
 76:     info->nz_allocated = (*a->emat).AllocatedMemory(); /* locally allocated */
 77:     info->nz_used      = info->nz_allocated;           /* assume Elemental does accurate allocation */
 78:     //PetscCallMPI(MPIU_Allreduce(isend,irecv,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
 79:     //PetscPrintf(PETSC_COMM_SELF,"    ... [%d] locally allocated %g\n",rank,info->nz_allocated);
 80:   }

 82:   info->nz_unneeded       = 0.0;
 83:   info->assemblies        = A->num_ass;
 84:   info->mallocs           = 0;
 85:   info->memory            = 0; /* REVIEW ME */
 86:   info->fill_ratio_given  = 0; /* determined by Elemental */
 87:   info->fill_ratio_needed = 0;
 88:   info->factor_mallocs    = 0;
 89:   PetscFunctionReturn(PETSC_SUCCESS);
 90: }

 92: static PetscErrorCode MatSetOption_Elemental(Mat A, MatOption op, PetscBool flg)
 93: {
 94:   Mat_Elemental *a = (Mat_Elemental *)A->data;

 96:   PetscFunctionBegin;
 97:   switch (op) {
 98:   case MAT_ROW_ORIENTED:
 99:     a->roworiented = flg;
100:     break;
101:   default:
102:     break;
103:   }
104:   PetscFunctionReturn(PETSC_SUCCESS);
105: }

107: static PetscErrorCode MatSetValues_Elemental(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
108: {
109:   Mat_Elemental *a = (Mat_Elemental *)A->data;
110:   PetscInt       i, j, rrank, ridx, crank, cidx, erow, ecol, numQueues = 0;

112:   PetscFunctionBegin;
113:   // TODO: Initialize matrix to all zeros?

115:   // Count the number of queues from this process
116:   if (a->roworiented) {
117:     for (i = 0; i < nr; i++) {
118:       if (rows[i] < 0) continue;
119:       P2RO(A, 0, rows[i], &rrank, &ridx);
120:       RO2E(A, 0, rrank, ridx, &erow);
121:       PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Incorrect row translation");
122:       for (j = 0; j < nc; j++) {
123:         if (cols[j] < 0) continue;
124:         P2RO(A, 1, cols[j], &crank, &cidx);
125:         RO2E(A, 1, crank, cidx, &ecol);
126:         PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Incorrect col translation");
127:         if (!a->emat->IsLocal(erow, ecol)) { /* off-proc entry */
128:           /* printf("Will later remotely update (%d,%d)\n",erow,ecol); */
129:           PetscCheck(imode == ADD_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only ADD_VALUES to off-processor entry is supported");
130:           ++numQueues;
131:           continue;
132:         }
133:         /* printf("Locally updating (%d,%d)\n",erow,ecol); */
134:         switch (imode) {
135:         case INSERT_VALUES:
136:           a->emat->Set(erow, ecol, (PetscElemScalar)vals[i * nc + j]);
137:           break;
138:         case ADD_VALUES:
139:           a->emat->Update(erow, ecol, (PetscElemScalar)vals[i * nc + j]);
140:           break;
141:         default:
142:           SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
143:         }
144:       }
145:     }

147:     /* printf("numQueues=%d\n",numQueues); */
148:     a->emat->Reserve(numQueues);
149:     for (i = 0; i < nr; i++) {
150:       if (rows[i] < 0) continue;
151:       P2RO(A, 0, rows[i], &rrank, &ridx);
152:       RO2E(A, 0, rrank, ridx, &erow);
153:       for (j = 0; j < nc; j++) {
154:         if (cols[j] < 0) continue;
155:         P2RO(A, 1, cols[j], &crank, &cidx);
156:         RO2E(A, 1, crank, cidx, &ecol);
157:         if (!a->emat->IsLocal(erow, ecol)) { /*off-proc entry*/
158:           /* printf("Queueing remotely update of (%d,%d)\n",erow,ecol); */
159:           a->emat->QueueUpdate(erow, ecol, vals[i * nc + j]);
160:         }
161:       }
162:     }
163:   } else { /* column-oriented */
164:     for (j = 0; j < nc; j++) {
165:       if (cols[j] < 0) continue;
166:       P2RO(A, 1, cols[j], &crank, &cidx);
167:       RO2E(A, 1, crank, cidx, &ecol);
168:       PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Incorrect col translation");
169:       for (i = 0; i < nr; i++) {
170:         if (rows[i] < 0) continue;
171:         P2RO(A, 0, rows[i], &rrank, &ridx);
172:         RO2E(A, 0, rrank, ridx, &erow);
173:         PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Incorrect row translation");
174:         if (!a->emat->IsLocal(erow, ecol)) { /* off-proc entry */
175:           /* printf("Will later remotely update (%d,%d)\n",erow,ecol); */
176:           PetscCheck(imode == ADD_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only ADD_VALUES to off-processor entry is supported");
177:           ++numQueues;
178:           continue;
179:         }
180:         /* printf("Locally updating (%d,%d)\n",erow,ecol); */
181:         switch (imode) {
182:         case INSERT_VALUES:
183:           a->emat->Set(erow, ecol, (PetscElemScalar)vals[i + j * nr]);
184:           break;
185:         case ADD_VALUES:
186:           a->emat->Update(erow, ecol, (PetscElemScalar)vals[i + j * nr]);
187:           break;
188:         default:
189:           SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
190:         }
191:       }
192:     }

194:     /* printf("numQueues=%d\n",numQueues); */
195:     a->emat->Reserve(numQueues);
196:     for (j = 0; j < nc; j++) {
197:       if (cols[j] < 0) continue;
198:       P2RO(A, 1, cols[j], &crank, &cidx);
199:       RO2E(A, 1, crank, cidx, &ecol);

201:       for (i = 0; i < nr; i++) {
202:         if (rows[i] < 0) continue;
203:         P2RO(A, 0, rows[i], &rrank, &ridx);
204:         RO2E(A, 0, rrank, ridx, &erow);
205:         if (!a->emat->IsLocal(erow, ecol)) { /*off-proc entry*/
206:           /* printf("Queueing remotely update of (%d,%d)\n",erow,ecol); */
207:           a->emat->QueueUpdate(erow, ecol, vals[i + j * nr]);
208:         }
209:       }
210:     }
211:   }
212:   PetscFunctionReturn(PETSC_SUCCESS);
213: }

215: static PetscErrorCode MatMult_Elemental(Mat A, Vec X, Vec Y)
216: {
217:   Mat_Elemental         *a = (Mat_Elemental *)A->data;
218:   const PetscElemScalar *x;
219:   PetscElemScalar       *y;
220:   PetscElemScalar        one = 1, zero = 0;

222:   PetscFunctionBegin;
223:   PetscCall(VecGetArrayRead(X, (const PetscScalar **)&x));
224:   PetscCall(VecGetArray(Y, (PetscScalar **)&y));
225:   { /* Scoping so that constructor is called before pointer is returned */
226:     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe, ye;
227:     xe.LockedAttach(A->cmap->N, 1, *a->grid, 0, 0, x, A->cmap->n);
228:     ye.Attach(A->rmap->N, 1, *a->grid, 0, 0, y, A->rmap->n);
229:     El::Gemv(El::NORMAL, one, *a->emat, xe, zero, ye);
230:   }
231:   PetscCall(VecRestoreArrayRead(X, (const PetscScalar **)&x));
232:   PetscCall(VecRestoreArray(Y, (PetscScalar **)&y));
233:   PetscFunctionReturn(PETSC_SUCCESS);
234: }

236: static PetscErrorCode MatMultTranspose_Elemental(Mat A, Vec X, Vec Y)
237: {
238:   Mat_Elemental         *a = (Mat_Elemental *)A->data;
239:   const PetscElemScalar *x;
240:   PetscElemScalar       *y;
241:   PetscElemScalar        one = 1, zero = 0;

243:   PetscFunctionBegin;
244:   PetscCall(VecGetArrayRead(X, (const PetscScalar **)&x));
245:   PetscCall(VecGetArray(Y, (PetscScalar **)&y));
246:   { /* Scoping so that constructor is called before pointer is returned */
247:     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe, ye;
248:     xe.LockedAttach(A->rmap->N, 1, *a->grid, 0, 0, x, A->rmap->n);
249:     ye.Attach(A->cmap->N, 1, *a->grid, 0, 0, y, A->cmap->n);
250:     El::Gemv(El::TRANSPOSE, one, *a->emat, xe, zero, ye);
251:   }
252:   PetscCall(VecRestoreArrayRead(X, (const PetscScalar **)&x));
253:   PetscCall(VecRestoreArray(Y, (PetscScalar **)&y));
254:   PetscFunctionReturn(PETSC_SUCCESS);
255: }

257: static PetscErrorCode MatMultAdd_Elemental(Mat A, Vec X, Vec Y, Vec Z)
258: {
259:   Mat_Elemental         *a = (Mat_Elemental *)A->data;
260:   const PetscElemScalar *x;
261:   PetscElemScalar       *z;
262:   PetscElemScalar        one = 1;

264:   PetscFunctionBegin;
265:   if (Y != Z) PetscCall(VecCopy(Y, Z));
266:   PetscCall(VecGetArrayRead(X, (const PetscScalar **)&x));
267:   PetscCall(VecGetArray(Z, (PetscScalar **)&z));
268:   { /* Scoping so that constructor is called before pointer is returned */
269:     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe, ze;
270:     xe.LockedAttach(A->cmap->N, 1, *a->grid, 0, 0, x, A->cmap->n);
271:     ze.Attach(A->rmap->N, 1, *a->grid, 0, 0, z, A->rmap->n);
272:     El::Gemv(El::NORMAL, one, *a->emat, xe, one, ze);
273:   }
274:   PetscCall(VecRestoreArrayRead(X, (const PetscScalar **)&x));
275:   PetscCall(VecRestoreArray(Z, (PetscScalar **)&z));
276:   PetscFunctionReturn(PETSC_SUCCESS);
277: }

279: static PetscErrorCode MatMultTransposeAdd_Elemental(Mat A, Vec X, Vec Y, Vec Z)
280: {
281:   Mat_Elemental         *a = (Mat_Elemental *)A->data;
282:   const PetscElemScalar *x;
283:   PetscElemScalar       *z;
284:   PetscElemScalar        one = 1;

286:   PetscFunctionBegin;
287:   if (Y != Z) PetscCall(VecCopy(Y, Z));
288:   PetscCall(VecGetArrayRead(X, (const PetscScalar **)&x));
289:   PetscCall(VecGetArray(Z, (PetscScalar **)&z));
290:   { /* Scoping so that constructor is called before pointer is returned */
291:     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe, ze;
292:     xe.LockedAttach(A->rmap->N, 1, *a->grid, 0, 0, x, A->rmap->n);
293:     ze.Attach(A->cmap->N, 1, *a->grid, 0, 0, z, A->cmap->n);
294:     El::Gemv(El::TRANSPOSE, one, *a->emat, xe, one, ze);
295:   }
296:   PetscCall(VecRestoreArrayRead(X, (const PetscScalar **)&x));
297:   PetscCall(VecRestoreArray(Z, (PetscScalar **)&z));
298:   PetscFunctionReturn(PETSC_SUCCESS);
299: }

301: PetscErrorCode MatMatMultNumeric_Elemental(Mat A, Mat B, Mat C)
302: {
303:   Mat_Elemental  *a   = (Mat_Elemental *)A->data;
304:   Mat_Elemental  *b   = (Mat_Elemental *)B->data;
305:   Mat_Elemental  *c   = (Mat_Elemental *)C->data;
306:   PetscElemScalar one = 1, zero = 0;

308:   PetscFunctionBegin;
309:   { /* Scoping so that constructor is called before pointer is returned */
310:     El::Gemm(El::NORMAL, El::NORMAL, one, *a->emat, *b->emat, zero, *c->emat);
311:   }
312:   C->assembled = PETSC_TRUE;
313:   PetscFunctionReturn(PETSC_SUCCESS);
314: }

316: PetscErrorCode MatMatMultSymbolic_Elemental(Mat A, Mat B, PetscReal, Mat Ce)
317: {
318:   PetscFunctionBegin;
319:   PetscCall(MatSetSizes(Ce, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
320:   PetscCall(MatSetType(Ce, MATELEMENTAL));
321:   PetscCall(MatSetUp(Ce));
322:   Ce->ops->matmultnumeric = MatMatMultNumeric_Elemental;
323:   PetscFunctionReturn(PETSC_SUCCESS);
324: }

326: static PetscErrorCode MatMatTransposeMultNumeric_Elemental(Mat A, Mat B, Mat C)
327: {
328:   Mat_Elemental  *a   = (Mat_Elemental *)A->data;
329:   Mat_Elemental  *b   = (Mat_Elemental *)B->data;
330:   Mat_Elemental  *c   = (Mat_Elemental *)C->data;
331:   PetscElemScalar one = 1, zero = 0;

333:   PetscFunctionBegin;
334:   { /* Scoping so that constructor is called before pointer is returned */
335:     El::Gemm(El::NORMAL, El::TRANSPOSE, one, *a->emat, *b->emat, zero, *c->emat);
336:   }
337:   C->assembled = PETSC_TRUE;
338:   PetscFunctionReturn(PETSC_SUCCESS);
339: }

341: static PetscErrorCode MatMatTransposeMultSymbolic_Elemental(Mat A, Mat B, PetscReal, Mat C)
342: {
343:   PetscFunctionBegin;
344:   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
345:   PetscCall(MatSetType(C, MATELEMENTAL));
346:   PetscCall(MatSetUp(C));
347:   PetscFunctionReturn(PETSC_SUCCESS);
348: }

350: static PetscErrorCode MatProductSetFromOptions_Elemental_AB(Mat C)
351: {
352:   PetscFunctionBegin;
353:   C->ops->matmultsymbolic = MatMatMultSymbolic_Elemental;
354:   C->ops->productsymbolic = MatProductSymbolic_AB;
355:   PetscFunctionReturn(PETSC_SUCCESS);
356: }

358: static PetscErrorCode MatProductSetFromOptions_Elemental_ABt(Mat C)
359: {
360:   PetscFunctionBegin;
361:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_Elemental;
362:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
363:   PetscFunctionReturn(PETSC_SUCCESS);
364: }

366: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_Elemental(Mat C)
367: {
368:   Mat_Product *product = C->product;

370:   PetscFunctionBegin;
371:   switch (product->type) {
372:   case MATPRODUCT_AB:
373:     PetscCall(MatProductSetFromOptions_Elemental_AB(C));
374:     break;
375:   case MATPRODUCT_ABt:
376:     PetscCall(MatProductSetFromOptions_Elemental_ABt(C));
377:     break;
378:   default:
379:     break;
380:   }
381:   PetscFunctionReturn(PETSC_SUCCESS);
382: }

384: static PetscErrorCode MatMatMultNumeric_Elemental_MPIDense(Mat A, Mat B, Mat C)
385: {
386:   Mat Be, Ce;

388:   PetscFunctionBegin;
389:   PetscCall(MatConvert(B, MATELEMENTAL, MAT_INITIAL_MATRIX, &Be));
390:   PetscCall(MatMatMult(A, Be, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &Ce));
391:   PetscCall(MatConvert(Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
392:   PetscCall(MatDestroy(&Be));
393:   PetscCall(MatDestroy(&Ce));
394:   PetscFunctionReturn(PETSC_SUCCESS);
395: }

397: static PetscErrorCode MatMatMultSymbolic_Elemental_MPIDense(Mat A, Mat B, PetscReal, Mat C)
398: {
399:   PetscFunctionBegin;
400:   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
401:   PetscCall(MatSetType(C, MATMPIDENSE));
402:   PetscCall(MatSetUp(C));
403:   C->ops->matmultnumeric = MatMatMultNumeric_Elemental_MPIDense;
404:   PetscFunctionReturn(PETSC_SUCCESS);
405: }

407: static PetscErrorCode MatProductSetFromOptions_Elemental_MPIDense_AB(Mat C)
408: {
409:   PetscFunctionBegin;
410:   C->ops->matmultsymbolic = MatMatMultSymbolic_Elemental_MPIDense;
411:   C->ops->productsymbolic = MatProductSymbolic_AB;
412:   PetscFunctionReturn(PETSC_SUCCESS);
413: }

415: static PetscErrorCode MatProductSetFromOptions_Elemental_MPIDense(Mat C)
416: {
417:   Mat_Product *product = C->product;

419:   PetscFunctionBegin;
420:   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_Elemental_MPIDense_AB(C));
421:   PetscFunctionReturn(PETSC_SUCCESS);
422: }

424: static PetscErrorCode MatGetDiagonal_Elemental(Mat A, Vec D)
425: {
426:   PetscInt        i, nrows, ncols, nD, rrank, ridx, crank, cidx;
427:   Mat_Elemental  *a = (Mat_Elemental *)A->data;
428:   PetscElemScalar v;
429:   MPI_Comm        comm;

431:   PetscFunctionBegin;
432:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
433:   PetscCall(MatGetSize(A, &nrows, &ncols));
434:   nD = nrows > ncols ? ncols : nrows;
435:   for (i = 0; i < nD; i++) {
436:     PetscInt erow, ecol;
437:     P2RO(A, 0, i, &rrank, &ridx);
438:     RO2E(A, 0, rrank, ridx, &erow);
439:     PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, comm, PETSC_ERR_PLIB, "Incorrect row translation");
440:     P2RO(A, 1, i, &crank, &cidx);
441:     RO2E(A, 1, crank, cidx, &ecol);
442:     PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, comm, PETSC_ERR_PLIB, "Incorrect col translation");
443:     v = a->emat->Get(erow, ecol);
444:     PetscCall(VecSetValues(D, 1, &i, (PetscScalar *)&v, INSERT_VALUES));
445:   }
446:   PetscCall(VecAssemblyBegin(D));
447:   PetscCall(VecAssemblyEnd(D));
448:   PetscFunctionReturn(PETSC_SUCCESS);
449: }

451: static PetscErrorCode MatDiagonalScale_Elemental(Mat X, Vec L, Vec R)
452: {
453:   Mat_Elemental         *x = (Mat_Elemental *)X->data;
454:   const PetscElemScalar *d;

456:   PetscFunctionBegin;
457:   if (R) {
458:     PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d));
459:     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> de;
460:     de.LockedAttach(X->cmap->N, 1, *x->grid, 0, 0, d, X->cmap->n);
461:     El::DiagonalScale(El::RIGHT, El::NORMAL, de, *x->emat);
462:     PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d));
463:   }
464:   if (L) {
465:     PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d));
466:     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> de;
467:     de.LockedAttach(X->rmap->N, 1, *x->grid, 0, 0, d, X->rmap->n);
468:     El::DiagonalScale(El::LEFT, El::NORMAL, de, *x->emat);
469:     PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d));
470:   }
471:   PetscFunctionReturn(PETSC_SUCCESS);
472: }

474: static PetscErrorCode MatMissingDiagonal_Elemental(Mat, PetscBool *missing, PetscInt *)
475: {
476:   PetscFunctionBegin;
477:   *missing = PETSC_FALSE;
478:   PetscFunctionReturn(PETSC_SUCCESS);
479: }

481: static PetscErrorCode MatScale_Elemental(Mat X, PetscScalar a)
482: {
483:   Mat_Elemental *x = (Mat_Elemental *)X->data;

485:   PetscFunctionBegin;
486:   El::Scale((PetscElemScalar)a, *x->emat);
487:   PetscFunctionReturn(PETSC_SUCCESS);
488: }

490: /*
491:   MatAXPY - Computes Y = a*X + Y.
492: */
493: static PetscErrorCode MatAXPY_Elemental(Mat Y, PetscScalar a, Mat X, MatStructure)
494: {
495:   Mat_Elemental *x = (Mat_Elemental *)X->data;
496:   Mat_Elemental *y = (Mat_Elemental *)Y->data;

498:   PetscFunctionBegin;
499:   El::Axpy((PetscElemScalar)a, *x->emat, *y->emat);
500:   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
501:   PetscFunctionReturn(PETSC_SUCCESS);
502: }

504: static PetscErrorCode MatCopy_Elemental(Mat A, Mat B, MatStructure)
505: {
506:   Mat_Elemental *a = (Mat_Elemental *)A->data;
507:   Mat_Elemental *b = (Mat_Elemental *)B->data;

509:   PetscFunctionBegin;
510:   El::Copy(*a->emat, *b->emat);
511:   PetscCall(PetscObjectStateIncrease((PetscObject)B));
512:   PetscFunctionReturn(PETSC_SUCCESS);
513: }

515: static PetscErrorCode MatDuplicate_Elemental(Mat A, MatDuplicateOption op, Mat *B)
516: {
517:   Mat            Be;
518:   MPI_Comm       comm;
519:   Mat_Elemental *a = (Mat_Elemental *)A->data;

521:   PetscFunctionBegin;
522:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
523:   PetscCall(MatCreate(comm, &Be));
524:   PetscCall(MatSetSizes(Be, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
525:   PetscCall(MatSetType(Be, MATELEMENTAL));
526:   PetscCall(MatSetUp(Be));
527:   *B = Be;
528:   if (op == MAT_COPY_VALUES) {
529:     Mat_Elemental *b = (Mat_Elemental *)Be->data;
530:     El::Copy(*a->emat, *b->emat);
531:   }
532:   Be->assembled = PETSC_TRUE;
533:   PetscFunctionReturn(PETSC_SUCCESS);
534: }

536: static PetscErrorCode MatTranspose_Elemental(Mat A, MatReuse reuse, Mat *B)
537: {
538:   Mat            Be = *B;
539:   MPI_Comm       comm;
540:   Mat_Elemental *a = (Mat_Elemental *)A->data, *b;

542:   PetscFunctionBegin;
543:   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
544:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
545:   /* Only out-of-place supported */
546:   PetscCheck(reuse != MAT_INPLACE_MATRIX, comm, PETSC_ERR_SUP, "Only out-of-place supported");
547:   if (reuse == MAT_INITIAL_MATRIX) {
548:     PetscCall(MatCreate(comm, &Be));
549:     PetscCall(MatSetSizes(Be, A->cmap->n, A->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
550:     PetscCall(MatSetType(Be, MATELEMENTAL));
551:     PetscCall(MatSetUp(Be));
552:     *B = Be;
553:   }
554:   b = (Mat_Elemental *)Be->data;
555:   El::Transpose(*a->emat, *b->emat);
556:   Be->assembled = PETSC_TRUE;
557:   PetscFunctionReturn(PETSC_SUCCESS);
558: }

560: static PetscErrorCode MatConjugate_Elemental(Mat A)
561: {
562:   Mat_Elemental *a = (Mat_Elemental *)A->data;

564:   PetscFunctionBegin;
565:   El::Conjugate(*a->emat);
566:   PetscFunctionReturn(PETSC_SUCCESS);
567: }

569: static PetscErrorCode MatHermitianTranspose_Elemental(Mat A, MatReuse reuse, Mat *B)
570: {
571:   Mat            Be = *B;
572:   MPI_Comm       comm;
573:   Mat_Elemental *a = (Mat_Elemental *)A->data, *b;

575:   PetscFunctionBegin;
576:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
577:   /* Only out-of-place supported */
578:   if (reuse == MAT_INITIAL_MATRIX) {
579:     PetscCall(MatCreate(comm, &Be));
580:     PetscCall(MatSetSizes(Be, A->cmap->n, A->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
581:     PetscCall(MatSetType(Be, MATELEMENTAL));
582:     PetscCall(MatSetUp(Be));
583:     *B = Be;
584:   }
585:   b = (Mat_Elemental *)Be->data;
586:   El::Adjoint(*a->emat, *b->emat);
587:   Be->assembled = PETSC_TRUE;
588:   PetscFunctionReturn(PETSC_SUCCESS);
589: }

591: static PetscErrorCode MatSolve_Elemental(Mat A, Vec B, Vec X)
592: {
593:   Mat_Elemental   *a = (Mat_Elemental *)A->data;
594:   PetscElemScalar *x;
595:   PetscInt         pivoting = a->pivoting;

597:   PetscFunctionBegin;
598:   PetscCall(VecCopy(B, X));
599:   PetscCall(VecGetArray(X, (PetscScalar **)&x));

601:   El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe;
602:   xe.Attach(A->rmap->N, 1, *a->grid, 0, 0, x, A->rmap->n);
603:   El::DistMatrix<PetscElemScalar, El::MC, El::MR> xer(xe);
604:   switch (A->factortype) {
605:   case MAT_FACTOR_LU:
606:     if (pivoting == 0) {
607:       El::lu::SolveAfter(El::NORMAL, *a->emat, xer);
608:     } else if (pivoting == 1) {
609:       El::lu::SolveAfter(El::NORMAL, *a->emat, *a->P, xer);
610:     } else { /* pivoting == 2 */
611:       El::lu::SolveAfter(El::NORMAL, *a->emat, *a->P, *a->Q, xer);
612:     }
613:     break;
614:   case MAT_FACTOR_CHOLESKY:
615:     El::cholesky::SolveAfter(El::UPPER, El::NORMAL, *a->emat, xer);
616:     break;
617:   default:
618:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
619:     break;
620:   }
621:   El::Copy(xer, xe);

623:   PetscCall(VecRestoreArray(X, (PetscScalar **)&x));
624:   PetscFunctionReturn(PETSC_SUCCESS);
625: }

627: static PetscErrorCode MatSolveAdd_Elemental(Mat A, Vec B, Vec Y, Vec X)
628: {
629:   PetscFunctionBegin;
630:   PetscCall(MatSolve_Elemental(A, B, X));
631:   PetscCall(VecAXPY(X, 1, Y));
632:   PetscFunctionReturn(PETSC_SUCCESS);
633: }

635: static PetscErrorCode MatMatSolve_Elemental(Mat A, Mat B, Mat X)
636: {
637:   Mat_Elemental *a = (Mat_Elemental *)A->data;
638:   Mat_Elemental *x;
639:   Mat            C;
640:   PetscInt       pivoting = a->pivoting;
641:   PetscBool      flg;
642:   MatType        type;

644:   PetscFunctionBegin;
645:   PetscCall(MatGetType(X, &type));
646:   PetscCall(PetscStrcmp(type, MATELEMENTAL, &flg));
647:   if (!flg) {
648:     PetscCall(MatConvert(B, MATELEMENTAL, MAT_INITIAL_MATRIX, &C));
649:     x = (Mat_Elemental *)C->data;
650:   } else {
651:     x = (Mat_Elemental *)X->data;
652:     El::Copy(*((Mat_Elemental *)B->data)->emat, *x->emat);
653:   }
654:   switch (A->factortype) {
655:   case MAT_FACTOR_LU:
656:     if (pivoting == 0) {
657:       El::lu::SolveAfter(El::NORMAL, *a->emat, *x->emat);
658:     } else if (pivoting == 1) {
659:       El::lu::SolveAfter(El::NORMAL, *a->emat, *a->P, *x->emat);
660:     } else {
661:       El::lu::SolveAfter(El::NORMAL, *a->emat, *a->P, *a->Q, *x->emat);
662:     }
663:     break;
664:   case MAT_FACTOR_CHOLESKY:
665:     El::cholesky::SolveAfter(El::UPPER, El::NORMAL, *a->emat, *x->emat);
666:     break;
667:   default:
668:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
669:     break;
670:   }
671:   if (!flg) {
672:     PetscCall(MatConvert(C, type, MAT_REUSE_MATRIX, &X));
673:     PetscCall(MatDestroy(&C));
674:   }
675:   PetscFunctionReturn(PETSC_SUCCESS);
676: }

678: static PetscErrorCode MatLUFactor_Elemental(Mat A, IS, IS, const MatFactorInfo *)
679: {
680:   Mat_Elemental *a        = (Mat_Elemental *)A->data;
681:   PetscInt       pivoting = a->pivoting;

683:   PetscFunctionBegin;
684:   if (pivoting == 0) {
685:     El::LU(*a->emat);
686:   } else if (pivoting == 1) {
687:     El::LU(*a->emat, *a->P);
688:   } else {
689:     El::LU(*a->emat, *a->P, *a->Q);
690:   }
691:   A->factortype = MAT_FACTOR_LU;
692:   A->assembled  = PETSC_TRUE;

694:   PetscCall(PetscFree(A->solvertype));
695:   PetscCall(PetscStrallocpy(MATSOLVERELEMENTAL, &A->solvertype));
696:   PetscFunctionReturn(PETSC_SUCCESS);
697: }

699: static PetscErrorCode MatLUFactorNumeric_Elemental(Mat F, Mat A, const MatFactorInfo *info)
700: {
701:   PetscFunctionBegin;
702:   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
703:   PetscCall(MatLUFactor_Elemental(F, nullptr, nullptr, info));
704:   PetscFunctionReturn(PETSC_SUCCESS);
705: }

707: static PetscErrorCode MatLUFactorSymbolic_Elemental(Mat, Mat, IS, IS, const MatFactorInfo *)
708: {
709:   PetscFunctionBegin;
710:   /* F is created and allocated by MatGetFactor_elemental_petsc(), skip this routine. */
711:   PetscFunctionReturn(PETSC_SUCCESS);
712: }

714: static PetscErrorCode MatCholeskyFactor_Elemental(Mat A, IS, const MatFactorInfo *)
715: {
716:   Mat_Elemental                                    *a = (Mat_Elemental *)A->data;
717:   El::DistMatrix<PetscElemScalar, El::MC, El::STAR> d;

719:   PetscFunctionBegin;
720:   El::Cholesky(El::UPPER, *a->emat);
721:   A->factortype = MAT_FACTOR_CHOLESKY;
722:   A->assembled  = PETSC_TRUE;

724:   PetscCall(PetscFree(A->solvertype));
725:   PetscCall(PetscStrallocpy(MATSOLVERELEMENTAL, &A->solvertype));
726:   PetscFunctionReturn(PETSC_SUCCESS);
727: }

729: static PetscErrorCode MatCholeskyFactorNumeric_Elemental(Mat F, Mat A, const MatFactorInfo *info)
730: {
731:   PetscFunctionBegin;
732:   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
733:   PetscCall(MatCholeskyFactor_Elemental(F, nullptr, info));
734:   PetscFunctionReturn(PETSC_SUCCESS);
735: }

737: static PetscErrorCode MatCholeskyFactorSymbolic_Elemental(Mat, Mat, IS, const MatFactorInfo *)
738: {
739:   PetscFunctionBegin;
740:   /* F is created and allocated by MatGetFactor_elemental_petsc(), skip this routine. */
741:   PetscFunctionReturn(PETSC_SUCCESS);
742: }

744: static PetscErrorCode MatFactorGetSolverType_elemental_elemental(Mat, MatSolverType *type)
745: {
746:   PetscFunctionBegin;
747:   *type = MATSOLVERELEMENTAL;
748:   PetscFunctionReturn(PETSC_SUCCESS);
749: }

751: static PetscErrorCode MatGetFactor_elemental_elemental(Mat A, MatFactorType ftype, Mat *F)
752: {
753:   Mat B;

755:   PetscFunctionBegin;
756:   /* Create the factorization matrix */
757:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
758:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
759:   PetscCall(MatSetType(B, MATELEMENTAL));
760:   PetscCall(MatSetUp(B));
761:   B->factortype      = ftype;
762:   B->trivialsymbolic = PETSC_TRUE;
763:   PetscCall(PetscFree(B->solvertype));
764:   PetscCall(PetscStrallocpy(MATSOLVERELEMENTAL, &B->solvertype));

766:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_elemental_elemental));
767:   *F = B;
768:   PetscFunctionReturn(PETSC_SUCCESS);
769: }

771: PETSC_INTERN PetscErrorCode MatSolverTypeRegister_Elemental(void)
772: {
773:   PetscFunctionBegin;
774:   PetscCall(MatSolverTypeRegister(MATSOLVERELEMENTAL, MATELEMENTAL, MAT_FACTOR_LU, MatGetFactor_elemental_elemental));
775:   PetscCall(MatSolverTypeRegister(MATSOLVERELEMENTAL, MATELEMENTAL, MAT_FACTOR_CHOLESKY, MatGetFactor_elemental_elemental));
776:   PetscFunctionReturn(PETSC_SUCCESS);
777: }

779: static PetscErrorCode MatNorm_Elemental(Mat A, NormType type, PetscReal *nrm)
780: {
781:   Mat_Elemental *a = (Mat_Elemental *)A->data;

783:   PetscFunctionBegin;
784:   switch (type) {
785:   case NORM_1:
786:     *nrm = El::OneNorm(*a->emat);
787:     break;
788:   case NORM_FROBENIUS:
789:     *nrm = El::FrobeniusNorm(*a->emat);
790:     break;
791:   case NORM_INFINITY:
792:     *nrm = El::InfinityNorm(*a->emat);
793:     break;
794:   default:
795:     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
796:   }
797:   PetscFunctionReturn(PETSC_SUCCESS);
798: }

800: static PetscErrorCode MatZeroEntries_Elemental(Mat A)
801: {
802:   Mat_Elemental *a = (Mat_Elemental *)A->data;

804:   PetscFunctionBegin;
805:   El::Zero(*a->emat);
806:   PetscFunctionReturn(PETSC_SUCCESS);
807: }

809: static PetscErrorCode MatGetOwnershipIS_Elemental(Mat A, IS *rows, IS *cols)
810: {
811:   Mat_Elemental *a = (Mat_Elemental *)A->data;
812:   PetscInt       i, m, shift, stride, *idx;

814:   PetscFunctionBegin;
815:   if (rows) {
816:     m      = a->emat->LocalHeight();
817:     shift  = a->emat->ColShift();
818:     stride = a->emat->ColStride();
819:     PetscCall(PetscMalloc1(m, &idx));
820:     for (i = 0; i < m; i++) {
821:       PetscInt rank, offset;
822:       E2RO(A, 0, shift + i * stride, &rank, &offset);
823:       RO2P(A, 0, rank, offset, &idx[i]);
824:     }
825:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, rows));
826:   }
827:   if (cols) {
828:     m      = a->emat->LocalWidth();
829:     shift  = a->emat->RowShift();
830:     stride = a->emat->RowStride();
831:     PetscCall(PetscMalloc1(m, &idx));
832:     for (i = 0; i < m; i++) {
833:       PetscInt rank, offset;
834:       E2RO(A, 1, shift + i * stride, &rank, &offset);
835:       RO2P(A, 1, rank, offset, &idx[i]);
836:     }
837:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, cols));
838:   }
839:   PetscFunctionReturn(PETSC_SUCCESS);
840: }

842: static PetscErrorCode MatConvert_Elemental_Dense(Mat A, MatType, MatReuse reuse, Mat *B)
843: {
844:   Mat             Bmpi;
845:   Mat_Elemental  *a = (Mat_Elemental *)A->data;
846:   MPI_Comm        comm;
847:   IS              isrows, iscols;
848:   PetscInt        rrank, ridx, crank, cidx, nrows, ncols, i, j, erow, ecol, elrow, elcol;
849:   const PetscInt *rows, *cols;
850:   PetscElemScalar v;
851:   const El::Grid &grid = a->emat->Grid();

853:   PetscFunctionBegin;
854:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));

856:   if (reuse == MAT_REUSE_MATRIX) {
857:     Bmpi = *B;
858:   } else {
859:     PetscCall(MatCreate(comm, &Bmpi));
860:     PetscCall(MatSetSizes(Bmpi, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
861:     PetscCall(MatSetType(Bmpi, MATDENSE));
862:     PetscCall(MatSetUp(Bmpi));
863:   }

865:   /* Get local entries of A */
866:   PetscCall(MatGetOwnershipIS(A, &isrows, &iscols));
867:   PetscCall(ISGetLocalSize(isrows, &nrows));
868:   PetscCall(ISGetIndices(isrows, &rows));
869:   PetscCall(ISGetLocalSize(iscols, &ncols));
870:   PetscCall(ISGetIndices(iscols, &cols));

872:   if (a->roworiented) {
873:     for (i = 0; i < nrows; i++) {
874:       P2RO(A, 0, rows[i], &rrank, &ridx); /* convert indices between PETSc <-> (Rank,Offset) <-> Elemental */
875:       RO2E(A, 0, rrank, ridx, &erow);
876:       PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, comm, PETSC_ERR_PLIB, "Incorrect row translation");
877:       for (j = 0; j < ncols; j++) {
878:         P2RO(A, 1, cols[j], &crank, &cidx);
879:         RO2E(A, 1, crank, cidx, &ecol);
880:         PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, comm, PETSC_ERR_PLIB, "Incorrect col translation");

882:         elrow = erow / grid.MCSize(); /* Elemental local row index */
883:         elcol = ecol / grid.MRSize(); /* Elemental local column index */
884:         v     = a->emat->GetLocal(elrow, elcol);
885:         PetscCall(MatSetValues(Bmpi, 1, &rows[i], 1, &cols[j], (PetscScalar *)&v, INSERT_VALUES));
886:       }
887:     }
888:   } else { /* column-oriented */
889:     for (j = 0; j < ncols; j++) {
890:       P2RO(A, 1, cols[j], &crank, &cidx);
891:       RO2E(A, 1, crank, cidx, &ecol);
892:       PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, comm, PETSC_ERR_PLIB, "Incorrect col translation");
893:       for (i = 0; i < nrows; i++) {
894:         P2RO(A, 0, rows[i], &rrank, &ridx); /* convert indices between PETSc <-> (Rank,Offset) <-> Elemental */
895:         RO2E(A, 0, rrank, ridx, &erow);
896:         PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, comm, PETSC_ERR_PLIB, "Incorrect row translation");

898:         elrow = erow / grid.MCSize(); /* Elemental local row index */
899:         elcol = ecol / grid.MRSize(); /* Elemental local column index */
900:         v     = a->emat->GetLocal(elrow, elcol);
901:         PetscCall(MatSetValues(Bmpi, 1, &rows[i], 1, &cols[j], (PetscScalar *)&v, INSERT_VALUES));
902:       }
903:     }
904:   }
905:   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
906:   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
907:   if (reuse == MAT_INPLACE_MATRIX) {
908:     PetscCall(MatHeaderReplace(A, &Bmpi));
909:   } else {
910:     *B = Bmpi;
911:   }
912:   PetscCall(ISDestroy(&isrows));
913:   PetscCall(ISDestroy(&iscols));
914:   PetscFunctionReturn(PETSC_SUCCESS);
915: }

917: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_Elemental(Mat A, MatType, MatReuse reuse, Mat *newmat)
918: {
919:   Mat                mat_elemental;
920:   PetscInt           M = A->rmap->N, N = A->cmap->N, row, ncols;
921:   const PetscInt    *cols;
922:   const PetscScalar *vals;

924:   PetscFunctionBegin;
925:   if (reuse == MAT_REUSE_MATRIX) {
926:     mat_elemental = *newmat;
927:     PetscCall(MatZeroEntries(mat_elemental));
928:   } else {
929:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
930:     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N));
931:     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
932:     PetscCall(MatSetUp(mat_elemental));
933:   }
934:   for (row = 0; row < M; row++) {
935:     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
936:     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
937:     PetscCall(MatSetValues(mat_elemental, 1, &row, ncols, cols, vals, ADD_VALUES));
938:     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
939:   }
940:   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
941:   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));

943:   if (reuse == MAT_INPLACE_MATRIX) {
944:     PetscCall(MatHeaderReplace(A, &mat_elemental));
945:   } else {
946:     *newmat = mat_elemental;
947:   }
948:   PetscFunctionReturn(PETSC_SUCCESS);
949: }

951: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat A, MatType, MatReuse reuse, Mat *newmat)
952: {
953:   Mat                mat_elemental;
954:   PetscInt           row, ncols, rstart = A->rmap->rstart, rend = A->rmap->rend, j;
955:   const PetscInt    *cols;
956:   const PetscScalar *vals;

958:   PetscFunctionBegin;
959:   if (reuse == MAT_REUSE_MATRIX) {
960:     mat_elemental = *newmat;
961:     PetscCall(MatZeroEntries(mat_elemental));
962:   } else {
963:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
964:     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
965:     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
966:     PetscCall(MatSetUp(mat_elemental));
967:   }
968:   for (row = rstart; row < rend; row++) {
969:     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
970:     for (j = 0; j < ncols; j++) {
971:       /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
972:       PetscCall(MatSetValues(mat_elemental, 1, &row, 1, &cols[j], &vals[j], ADD_VALUES));
973:     }
974:     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
975:   }
976:   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
977:   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));

979:   if (reuse == MAT_INPLACE_MATRIX) {
980:     PetscCall(MatHeaderReplace(A, &mat_elemental));
981:   } else {
982:     *newmat = mat_elemental;
983:   }
984:   PetscFunctionReturn(PETSC_SUCCESS);
985: }

987: PETSC_INTERN PetscErrorCode MatConvert_SeqSBAIJ_Elemental(Mat A, MatType, MatReuse reuse, Mat *newmat)
988: {
989:   Mat                mat_elemental;
990:   PetscInt           M = A->rmap->N, N = A->cmap->N, row, ncols, j;
991:   const PetscInt    *cols;
992:   const PetscScalar *vals;

994:   PetscFunctionBegin;
995:   if (reuse == MAT_REUSE_MATRIX) {
996:     mat_elemental = *newmat;
997:     PetscCall(MatZeroEntries(mat_elemental));
998:   } else {
999:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1000:     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N));
1001:     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1002:     PetscCall(MatSetUp(mat_elemental));
1003:   }
1004:   PetscCall(MatGetRowUpperTriangular(A));
1005:   for (row = 0; row < M; row++) {
1006:     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1007:     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1008:     PetscCall(MatSetValues(mat_elemental, 1, &row, ncols, cols, vals, ADD_VALUES));
1009:     for (j = 0; j < ncols; j++) { /* lower triangular part */
1010:       PetscScalar v;
1011:       if (cols[j] == row) continue;
1012:       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1013:       PetscCall(MatSetValues(mat_elemental, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1014:     }
1015:     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1016:   }
1017:   PetscCall(MatRestoreRowUpperTriangular(A));
1018:   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1019:   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));

1021:   if (reuse == MAT_INPLACE_MATRIX) {
1022:     PetscCall(MatHeaderReplace(A, &mat_elemental));
1023:   } else {
1024:     *newmat = mat_elemental;
1025:   }
1026:   PetscFunctionReturn(PETSC_SUCCESS);
1027: }

1029: PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat A, MatType, MatReuse reuse, Mat *newmat)
1030: {
1031:   Mat                mat_elemental;
1032:   PetscInt           M = A->rmap->N, N = A->cmap->N, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1033:   const PetscInt    *cols;
1034:   const PetscScalar *vals;

1036:   PetscFunctionBegin;
1037:   if (reuse == MAT_REUSE_MATRIX) {
1038:     mat_elemental = *newmat;
1039:     PetscCall(MatZeroEntries(mat_elemental));
1040:   } else {
1041:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1042:     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N));
1043:     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1044:     PetscCall(MatSetUp(mat_elemental));
1045:   }
1046:   PetscCall(MatGetRowUpperTriangular(A));
1047:   for (row = rstart; row < rend; row++) {
1048:     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1049:     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1050:     PetscCall(MatSetValues(mat_elemental, 1, &row, ncols, cols, vals, ADD_VALUES));
1051:     for (j = 0; j < ncols; j++) { /* lower triangular part */
1052:       PetscScalar v;
1053:       if (cols[j] == row) continue;
1054:       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1055:       PetscCall(MatSetValues(mat_elemental, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1056:     }
1057:     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1058:   }
1059:   PetscCall(MatRestoreRowUpperTriangular(A));
1060:   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1061:   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));

1063:   if (reuse == MAT_INPLACE_MATRIX) {
1064:     PetscCall(MatHeaderReplace(A, &mat_elemental));
1065:   } else {
1066:     *newmat = mat_elemental;
1067:   }
1068:   PetscFunctionReturn(PETSC_SUCCESS);
1069: }

1071: static PetscErrorCode MatDestroy_Elemental(Mat A)
1072: {
1073:   Mat_Elemental      *a = (Mat_Elemental *)A->data;
1074:   Mat_Elemental_Grid *commgrid;
1075:   PetscBool           flg;
1076:   MPI_Comm            icomm;

1078:   PetscFunctionBegin;
1079:   delete a->emat;
1080:   delete a->P;
1081:   delete a->Q;

1083:   El::mpi::Comm cxxcomm(PetscObjectComm((PetscObject)A));
1084:   PetscCall(PetscCommDuplicate(cxxcomm.comm, &icomm, nullptr));
1085:   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Elemental_keyval, (void **)&commgrid, (int *)&flg));
1086:   if (--commgrid->grid_refct == 0) {
1087:     delete commgrid->grid;
1088:     PetscCall(PetscFree(commgrid));
1089:     PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Elemental_keyval));
1090:   }
1091:   PetscCall(PetscCommDestroy(&icomm));
1092:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", nullptr));
1093:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", nullptr));
1094:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_elemental_mpidense_C", nullptr));
1095:   PetscCall(PetscFree(A->data));
1096:   PetscFunctionReturn(PETSC_SUCCESS);
1097: }

1099: static PetscErrorCode MatSetUp_Elemental(Mat A)
1100: {
1101:   Mat_Elemental *a = (Mat_Elemental *)A->data;
1102:   MPI_Comm       comm;
1103:   PetscMPIInt    rsize, csize;
1104:   PetscInt       n;

1106:   PetscFunctionBegin;
1107:   PetscCall(PetscLayoutSetUp(A->rmap));
1108:   PetscCall(PetscLayoutSetUp(A->cmap));

1110:   /* Check if local row and column sizes are equally distributed.
1111:      Jed: Elemental uses "element" cyclic ordering so the sizes need to match that
1112:      exactly.  The strategy in MatElemental is for PETSc to implicitly permute to block ordering (like would be returned by
1113:      PetscSplitOwnership(comm,&n,&N), at which point Elemental matrices can act on PETSc vectors without redistributing the vectors. */
1114:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1115:   n = PETSC_DECIDE;
1116:   PetscCall(PetscSplitOwnership(comm, &n, &A->rmap->N));
1117:   PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Local row size %" PetscInt_FMT " of ELEMENTAL matrix must be equally distributed", A->rmap->n);

1119:   n = PETSC_DECIDE;
1120:   PetscCall(PetscSplitOwnership(comm, &n, &A->cmap->N));
1121:   PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Local column size %" PetscInt_FMT " of ELEMENTAL matrix must be equally distributed", A->cmap->n);

1123:   a->emat->Resize(A->rmap->N, A->cmap->N);
1124:   El::Zero(*a->emat);

1126:   PetscCallMPI(MPI_Comm_size(A->rmap->comm, &rsize));
1127:   PetscCallMPI(MPI_Comm_size(A->cmap->comm, &csize));
1128:   PetscCheck(csize == rsize, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Cannot use row and column communicators of different sizes");
1129:   a->commsize = rsize;
1130:   a->mr[0]    = A->rmap->N % rsize;
1131:   if (!a->mr[0]) a->mr[0] = rsize;
1132:   a->mr[1] = A->cmap->N % csize;
1133:   if (!a->mr[1]) a->mr[1] = csize;
1134:   a->m[0] = A->rmap->N / rsize + (a->mr[0] != rsize);
1135:   a->m[1] = A->cmap->N / csize + (a->mr[1] != csize);
1136:   PetscFunctionReturn(PETSC_SUCCESS);
1137: }

1139: static PetscErrorCode MatAssemblyBegin_Elemental(Mat A, MatAssemblyType)
1140: {
1141:   Mat_Elemental *a = (Mat_Elemental *)A->data;

1143:   PetscFunctionBegin;
1144:   /* printf("Calling ProcessQueues\n"); */
1145:   a->emat->ProcessQueues();
1146:   /* printf("Finished ProcessQueues\n"); */
1147:   PetscFunctionReturn(PETSC_SUCCESS);
1148: }

1150: static PetscErrorCode MatAssemblyEnd_Elemental(Mat, MatAssemblyType)
1151: {
1152:   PetscFunctionBegin;
1153:   /* Currently does nothing */
1154:   PetscFunctionReturn(PETSC_SUCCESS);
1155: }

1157: static PetscErrorCode MatLoad_Elemental(Mat newMat, PetscViewer viewer)
1158: {
1159:   Mat      Adense, Ae;
1160:   MPI_Comm comm;

1162:   PetscFunctionBegin;
1163:   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1164:   PetscCall(MatCreate(comm, &Adense));
1165:   PetscCall(MatSetType(Adense, MATDENSE));
1166:   PetscCall(MatLoad(Adense, viewer));
1167:   PetscCall(MatConvert(Adense, MATELEMENTAL, MAT_INITIAL_MATRIX, &Ae));
1168:   PetscCall(MatDestroy(&Adense));
1169:   PetscCall(MatHeaderReplace(newMat, &Ae));
1170:   PetscFunctionReturn(PETSC_SUCCESS);
1171: }

1173: static struct _MatOps MatOps_Values = {MatSetValues_Elemental,
1174:                                        nullptr,
1175:                                        nullptr,
1176:                                        MatMult_Elemental,
1177:                                        /* 4*/ MatMultAdd_Elemental,
1178:                                        MatMultTranspose_Elemental,
1179:                                        MatMultTransposeAdd_Elemental,
1180:                                        MatSolve_Elemental,
1181:                                        MatSolveAdd_Elemental,
1182:                                        nullptr,
1183:                                        /*10*/ nullptr,
1184:                                        MatLUFactor_Elemental,
1185:                                        MatCholeskyFactor_Elemental,
1186:                                        nullptr,
1187:                                        MatTranspose_Elemental,
1188:                                        /*15*/ MatGetInfo_Elemental,
1189:                                        nullptr,
1190:                                        MatGetDiagonal_Elemental,
1191:                                        MatDiagonalScale_Elemental,
1192:                                        MatNorm_Elemental,
1193:                                        /*20*/ MatAssemblyBegin_Elemental,
1194:                                        MatAssemblyEnd_Elemental,
1195:                                        MatSetOption_Elemental,
1196:                                        MatZeroEntries_Elemental,
1197:                                        /*24*/ nullptr,
1198:                                        MatLUFactorSymbolic_Elemental,
1199:                                        MatLUFactorNumeric_Elemental,
1200:                                        MatCholeskyFactorSymbolic_Elemental,
1201:                                        MatCholeskyFactorNumeric_Elemental,
1202:                                        /*29*/ MatSetUp_Elemental,
1203:                                        nullptr,
1204:                                        nullptr,
1205:                                        nullptr,
1206:                                        nullptr,
1207:                                        /*34*/ MatDuplicate_Elemental,
1208:                                        nullptr,
1209:                                        nullptr,
1210:                                        nullptr,
1211:                                        nullptr,
1212:                                        /*39*/ MatAXPY_Elemental,
1213:                                        nullptr,
1214:                                        nullptr,
1215:                                        nullptr,
1216:                                        MatCopy_Elemental,
1217:                                        /*44*/ nullptr,
1218:                                        MatScale_Elemental,
1219:                                        MatShift_Basic,
1220:                                        nullptr,
1221:                                        nullptr,
1222:                                        /*49*/ nullptr,
1223:                                        nullptr,
1224:                                        nullptr,
1225:                                        nullptr,
1226:                                        nullptr,
1227:                                        /*54*/ nullptr,
1228:                                        nullptr,
1229:                                        nullptr,
1230:                                        nullptr,
1231:                                        nullptr,
1232:                                        /*59*/ nullptr,
1233:                                        MatDestroy_Elemental,
1234:                                        MatView_Elemental,
1235:                                        nullptr,
1236:                                        nullptr,
1237:                                        /*64*/ nullptr,
1238:                                        nullptr,
1239:                                        nullptr,
1240:                                        nullptr,
1241:                                        nullptr,
1242:                                        /*69*/ nullptr,
1243:                                        nullptr,
1244:                                        MatConvert_Elemental_Dense,
1245:                                        nullptr,
1246:                                        nullptr,
1247:                                        /*74*/ nullptr,
1248:                                        nullptr,
1249:                                        nullptr,
1250:                                        nullptr,
1251:                                        nullptr,
1252:                                        /*79*/ nullptr,
1253:                                        nullptr,
1254:                                        nullptr,
1255:                                        nullptr,
1256:                                        MatLoad_Elemental,
1257:                                        /*84*/ nullptr,
1258:                                        nullptr,
1259:                                        nullptr,
1260:                                        nullptr,
1261:                                        nullptr,
1262:                                        /*89*/ nullptr,
1263:                                        nullptr,
1264:                                        MatMatMultNumeric_Elemental,
1265:                                        nullptr,
1266:                                        nullptr,
1267:                                        /*94*/ nullptr,
1268:                                        nullptr,
1269:                                        nullptr,
1270:                                        MatMatTransposeMultNumeric_Elemental,
1271:                                        nullptr,
1272:                                        /*99*/ MatProductSetFromOptions_Elemental,
1273:                                        nullptr,
1274:                                        nullptr,
1275:                                        MatConjugate_Elemental,
1276:                                        nullptr,
1277:                                        /*104*/ nullptr,
1278:                                        nullptr,
1279:                                        nullptr,
1280:                                        nullptr,
1281:                                        nullptr,
1282:                                        /*109*/ MatMatSolve_Elemental,
1283:                                        nullptr,
1284:                                        nullptr,
1285:                                        nullptr,
1286:                                        MatMissingDiagonal_Elemental,
1287:                                        /*114*/ nullptr,
1288:                                        nullptr,
1289:                                        nullptr,
1290:                                        nullptr,
1291:                                        nullptr,
1292:                                        /*119*/ nullptr,
1293:                                        MatHermitianTranspose_Elemental,
1294:                                        nullptr,
1295:                                        nullptr,
1296:                                        nullptr,
1297:                                        /*124*/ nullptr,
1298:                                        nullptr,
1299:                                        nullptr,
1300:                                        nullptr,
1301:                                        nullptr,
1302:                                        /*129*/ nullptr,
1303:                                        nullptr,
1304:                                        nullptr,
1305:                                        nullptr,
1306:                                        nullptr,
1307:                                        /*134*/ nullptr,
1308:                                        nullptr,
1309:                                        nullptr,
1310:                                        nullptr,
1311:                                        nullptr,
1312:                                        nullptr,
1313:                                        /*140*/ nullptr,
1314:                                        nullptr,
1315:                                        nullptr,
1316:                                        nullptr,
1317:                                        nullptr,
1318:                                        /*145*/ nullptr,
1319:                                        nullptr,
1320:                                        nullptr,
1321:                                        nullptr,
1322:                                        nullptr,
1323:                                        /*150*/ nullptr,
1324:                                        nullptr,
1325:                                        nullptr,
1326:                                        nullptr,
1327:                                        nullptr,
1328:                                        /*155*/ nullptr,
1329:                                        nullptr};

1331: /*MC
1332:    MATELEMENTAL = "elemental" - A matrix type for dense matrices using the Elemental package

1334:   Use ./configure --download-elemental to install PETSc to use Elemental

1336:    Options Database Keys:
1337: + -mat_type elemental - sets the matrix type to "elemental" during a call to MatSetFromOptions()
1338: . -pc_factor_mat_solver_type elemental - to use this direct solver with the option -pc_type lu
1339: - -mat_elemental_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix

1341:   Level: beginner

1343:   Note:
1344:    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1345:    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1346:    the given rank.

1348: .seealso: `MATDENSE`, `MATSCALAPACK`, `MatGetOwnershipIS()`
1349: M*/
1350: #if defined(__clang__)
1351:   #pragma clang diagnostic push
1352:   #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
1353: #endif
1354: PETSC_EXTERN PetscErrorCode MatCreate_Elemental(Mat A)
1355: {
1356:   Mat_Elemental      *a;
1357:   PetscBool           flg, flg1;
1358:   Mat_Elemental_Grid *commgrid;
1359:   MPI_Comm            icomm;
1360:   PetscInt            optv1;

1362:   PetscFunctionBegin;
1363:   A->ops[0]     = MatOps_Values;
1364:   A->insertmode = NOT_SET_VALUES;

1366:   PetscCall(PetscNew(&a));
1367:   A->data = (void *)a;

1369:   /* Set up the elemental matrix */
1370:   El::mpi::Comm cxxcomm(PetscObjectComm((PetscObject)A));

1372:   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1373:   if (Petsc_Elemental_keyval == MPI_KEYVAL_INVALID) {
1374:     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_Elemental_keyval, nullptr));
1375:     PetscCall(PetscCitationsRegister(ElementalCitation, &ElementalCite));
1376:   }
1377:   PetscCall(PetscCommDuplicate(cxxcomm.comm, &icomm, NULL));
1378:   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Elemental_keyval, (void **)&commgrid, (int *)&flg));
1379:   if (!flg) {
1380:     PetscCall(PetscNew(&commgrid));

1382:     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "Elemental Options", "Mat");
1383:     /* displayed default grid sizes (CommSize,1) are set by us arbitrarily until El::Grid() is called */
1384:     PetscCall(PetscOptionsInt("-mat_elemental_grid_height", "Grid Height", "None", El::mpi::Size(cxxcomm), &optv1, &flg1));
1385:     if (flg1) {
1386:       PetscCheck((El::mpi::Size(cxxcomm) % optv1) == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %" PetscInt_FMT, optv1, (PetscInt)El::mpi::Size(cxxcomm));
1387:       commgrid->grid = new El::Grid(cxxcomm, optv1); /* use user-provided grid height */
1388:     } else {
1389:       commgrid->grid = new El::Grid(cxxcomm); /* use Elemental default grid sizes */
1390:       /* printf("new commgrid->grid = %p\n",commgrid->grid);  -- memory leak revealed by valgrind? */
1391:     }
1392:     commgrid->grid_refct = 1;
1393:     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_Elemental_keyval, (void *)commgrid));

1395:     a->pivoting = 1;
1396:     PetscCall(PetscOptionsInt("-mat_elemental_pivoting", "Pivoting", "None", a->pivoting, &a->pivoting, NULL));

1398:     PetscOptionsEnd();
1399:   } else {
1400:     commgrid->grid_refct++;
1401:   }
1402:   PetscCall(PetscCommDestroy(&icomm));
1403:   a->grid        = commgrid->grid;
1404:   a->emat        = new El::DistMatrix<PetscElemScalar>(*a->grid);
1405:   a->roworiented = PETSC_TRUE;

1407:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_Elemental));
1408:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_elemental_mpidense_C", MatProductSetFromOptions_Elemental_MPIDense));
1409:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATELEMENTAL));
1410:   PetscFunctionReturn(PETSC_SUCCESS);
1411: }
1412: #if defined(__clang__)
1413:   #pragma clang diagnostic pop
1414: #endif