Actual source code: matscalapack.c

  1: #include <petsc/private/petscscalapack.h>

  3: const char       ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
  4:                                        "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
  5:                                        "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
  6:                                        "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
  7:                                        "       TITLE = {Sca{LAPACK} Users' Guide},\n"
  8:                                        "       PUBLISHER = {SIAM},\n"
  9:                                        "       ADDRESS = {Philadelphia, PA},\n"
 10:                                        "       YEAR = 1997\n"
 11:                                        "}\n";
 12: static PetscBool ScaLAPACKCite       = PETSC_FALSE;

 14: #define DEFAULT_BLOCKSIZE 64

 16: /*
 17:     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
 18:   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
 19: */
 20: static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;

 22: static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
 23: {
 24:   PetscFunctionBegin;
 25:   PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
 26:   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
 27:   PetscFunctionReturn(PETSC_SUCCESS);
 28: }

 30: static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer)
 31: {
 32:   Mat_ScaLAPACK    *a = (Mat_ScaLAPACK *)A->data;
 33:   PetscBool         iascii;
 34:   PetscViewerFormat format;
 35:   Mat               Adense;

 37:   PetscFunctionBegin;
 38:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
 39:   if (iascii) {
 40:     PetscCall(PetscViewerGetFormat(viewer, &format));
 41:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
 42:       PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
 43:       PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
 44:       PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
 45:       PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
 46:       PetscFunctionReturn(PETSC_SUCCESS);
 47:     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
 48:       PetscFunctionReturn(PETSC_SUCCESS);
 49:     }
 50:   }
 51:   /* convert to dense format and call MatView() */
 52:   PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
 53:   PetscCall(MatView(Adense, viewer));
 54:   PetscCall(MatDestroy(&Adense));
 55:   PetscFunctionReturn(PETSC_SUCCESS);
 56: }

 58: static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info)
 59: {
 60:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
 61:   PetscLogDouble isend[2], irecv[2];

 63:   PetscFunctionBegin;
 64:   info->block_size = 1.0;

 66:   isend[0] = a->lld * a->locc;  /* locally allocated */
 67:   isend[1] = a->locr * a->locc; /* used submatrix */
 68:   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
 69:     info->nz_allocated = isend[0];
 70:     info->nz_used      = isend[1];
 71:   } else if (flag == MAT_GLOBAL_MAX) {
 72:     PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
 73:     info->nz_allocated = irecv[0];
 74:     info->nz_used      = irecv[1];
 75:   } else if (flag == MAT_GLOBAL_SUM) {
 76:     PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
 77:     info->nz_allocated = irecv[0];
 78:     info->nz_used      = irecv[1];
 79:   }

 81:   info->nz_unneeded       = 0;
 82:   info->assemblies        = A->num_ass;
 83:   info->mallocs           = 0;
 84:   info->memory            = 0; /* REVIEW ME */
 85:   info->fill_ratio_given  = 0;
 86:   info->fill_ratio_needed = 0;
 87:   info->factor_mallocs    = 0;
 88:   PetscFunctionReturn(PETSC_SUCCESS);
 89: }

 91: static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg)
 92: {
 93:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;

 95:   PetscFunctionBegin;
 96:   switch (op) {
 97:   case MAT_NEW_NONZERO_LOCATIONS:
 98:   case MAT_NEW_NONZERO_LOCATION_ERR:
 99:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
100:   case MAT_SYMMETRIC:
101:   case MAT_SORTED_FULL:
102:   case MAT_HERMITIAN:
103:     break;
104:   case MAT_ROW_ORIENTED:
105:     a->roworiented = flg;
106:     break;
107:   default:
108:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
109:   }
110:   PetscFunctionReturn(PETSC_SUCCESS);
111: }

113: static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
114: {
115:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
116:   PetscInt       i, j;
117:   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
118:   PetscBool      roworiented = a->roworiented;

120:   PetscFunctionBegin;
121:   PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
122:   for (i = 0; i < nr; i++) {
123:     if (rows[i] < 0) continue;
124:     PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
125:     for (j = 0; j < nc; j++) {
126:       if (cols[j] < 0) continue;
127:       PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
128:       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
129:       if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
130:         if (roworiented) {
131:           switch (imode) {
132:           case INSERT_VALUES:
133:             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j];
134:             break;
135:           default:
136:             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j];
137:             break;
138:           }
139:         } else {
140:           switch (imode) {
141:           case INSERT_VALUES:
142:             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr];
143:             break;
144:           default:
145:             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr];
146:             break;
147:           }
148:         }
149:       } else {
150:         PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
151:         A->assembled = PETSC_FALSE;
152:         PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
153:       }
154:     }
155:   }
156:   PetscFunctionReturn(PETSC_SUCCESS);
157: }

159: static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscBool hermitian, PetscScalar beta, const PetscScalar *x, PetscScalar *y)
160: {
161:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
162:   PetscScalar    *x2d, *y2d, alpha = 1.0;
163:   const PetscInt *ranges;
164:   PetscBLASInt    xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;

166:   PetscFunctionBegin;
167:   if (transpose) {
168:     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
169:     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
170:     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
171:     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld));
172:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
173:     PetscCheckScaLapackInfo("descinit", info);
174:     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
175:     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
176:     ylld = 1;
177:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
178:     PetscCheckScaLapackInfo("descinit", info);

180:     /* allocate 2d vectors */
181:     lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
182:     lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
183:     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
184:     PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld));

186:     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
187:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
188:     PetscCheckScaLapackInfo("descinit", info);
189:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
190:     PetscCheckScaLapackInfo("descinit", info);

192:     /* redistribute x as a column of a 2d matrix */
193:     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));

195:     /* redistribute y as a row of a 2d matrix */
196:     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));

198:     /* call PBLAS subroutine */
199:     if (hermitian) PetscCallBLAS("PBLASgemv", PBLASgemv_("C", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
200:     else PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));

202:     /* redistribute y from a row of a 2d matrix */
203:     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));

205:   } else { /* non-transpose */

207:     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
208:     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
209:     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
210:     xlld = 1;
211:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
212:     PetscCheckScaLapackInfo("descinit", info);
213:     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
214:     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
215:     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &ylld));
216:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
217:     PetscCheckScaLapackInfo("descinit", info);

219:     /* allocate 2d vectors */
220:     lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
221:     lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
222:     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
223:     PetscCall(PetscBLASIntCast(PetscMax(1, lszy), &ylld));

225:     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
226:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
227:     PetscCheckScaLapackInfo("descinit", info);
228:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
229:     PetscCheckScaLapackInfo("descinit", info);

231:     /* redistribute x as a row of a 2d matrix */
232:     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));

234:     /* redistribute y as a column of a 2d matrix */
235:     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));

237:     /* call PBLAS subroutine */
238:     PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));

240:     /* redistribute y from a column of a 2d matrix */
241:     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
242:   }
243:   PetscCall(PetscFree2(x2d, y2d));
244:   PetscFunctionReturn(PETSC_SUCCESS);
245: }

247: static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y)
248: {
249:   const PetscScalar *xarray;
250:   PetscScalar       *yarray;

252:   PetscFunctionBegin;
253:   PetscCall(VecGetArrayRead(x, &xarray));
254:   PetscCall(VecGetArray(y, &yarray));
255:   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 0.0, xarray, yarray));
256:   PetscCall(VecRestoreArrayRead(x, &xarray));
257:   PetscCall(VecRestoreArray(y, &yarray));
258:   PetscFunctionReturn(PETSC_SUCCESS);
259: }

261: static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
262: {
263:   const PetscScalar *xarray;
264:   PetscScalar       *yarray;

266:   PetscFunctionBegin;
267:   PetscCall(VecGetArrayRead(x, &xarray));
268:   PetscCall(VecGetArray(y, &yarray));
269:   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 0.0, xarray, yarray));
270:   PetscCall(VecRestoreArrayRead(x, &xarray));
271:   PetscCall(VecRestoreArray(y, &yarray));
272:   PetscFunctionReturn(PETSC_SUCCESS);
273: }

275: static PetscErrorCode MatMultHermitianTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
276: {
277:   const PetscScalar *xarray;
278:   PetscScalar       *yarray;

280:   PetscFunctionBegin;
281:   PetscCall(VecGetArrayRead(x, &xarray));
282:   PetscCall(VecGetArrayWrite(y, &yarray));
283:   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 0.0, xarray, yarray));
284:   PetscCall(VecRestoreArrayRead(x, &xarray));
285:   PetscCall(VecRestoreArrayWrite(y, &yarray));
286:   PetscFunctionReturn(PETSC_SUCCESS);
287: }

289: static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
290: {
291:   const PetscScalar *xarray;
292:   PetscScalar       *zarray;

294:   PetscFunctionBegin;
295:   if (y != z) PetscCall(VecCopy(y, z));
296:   PetscCall(VecGetArrayRead(x, &xarray));
297:   PetscCall(VecGetArray(z, &zarray));
298:   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 1.0, xarray, zarray));
299:   PetscCall(VecRestoreArrayRead(x, &xarray));
300:   PetscCall(VecRestoreArray(z, &zarray));
301:   PetscFunctionReturn(PETSC_SUCCESS);
302: }

304: static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
305: {
306:   const PetscScalar *xarray;
307:   PetscScalar       *zarray;

309:   PetscFunctionBegin;
310:   if (y != z) PetscCall(VecCopy(y, z));
311:   PetscCall(VecGetArrayRead(x, &xarray));
312:   PetscCall(VecGetArray(z, &zarray));
313:   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 1.0, xarray, zarray));
314:   PetscCall(VecRestoreArrayRead(x, &xarray));
315:   PetscCall(VecRestoreArray(z, &zarray));
316:   PetscFunctionReturn(PETSC_SUCCESS);
317: }

319: static PetscErrorCode MatMultHermitianTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
320: {
321:   const PetscScalar *xarray;
322:   PetscScalar       *zarray;

324:   PetscFunctionBegin;
325:   if (y != z) PetscCall(VecCopy(y, z));
326:   PetscCall(VecGetArrayRead(x, &xarray));
327:   PetscCall(VecGetArray(z, &zarray));
328:   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 1.0, xarray, zarray));
329:   PetscCall(VecRestoreArrayRead(x, &xarray));
330:   PetscCall(VecRestoreArray(z, &zarray));
331:   PetscFunctionReturn(PETSC_SUCCESS);
332: }

334: PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
335: {
336:   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
337:   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
338:   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
339:   PetscScalar    sone = 1.0, zero = 0.0;
340:   PetscBLASInt   one = 1;

342:   PetscFunctionBegin;
343:   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
344:   C->assembled = PETSC_TRUE;
345:   PetscFunctionReturn(PETSC_SUCCESS);
346: }

348: PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
349: {
350:   PetscFunctionBegin;
351:   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
352:   PetscCall(MatSetType(C, MATSCALAPACK));
353:   PetscCall(MatSetUp(C));
354:   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
355:   PetscFunctionReturn(PETSC_SUCCESS);
356: }

358: static PetscErrorCode MatTransposeMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
359: {
360:   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
361:   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
362:   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
363:   PetscScalar    sone = 1.0, zero = 0.0;
364:   PetscBLASInt   one = 1;

366:   PetscFunctionBegin;
367:   PetscCallBLAS("PBLASgemm", PBLASgemm_("T", "N", &a->N, &b->N, &a->M, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
368:   C->assembled = PETSC_TRUE;
369:   PetscFunctionReturn(PETSC_SUCCESS);
370: }

372: static PetscErrorCode MatTransposeMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
373: {
374:   PetscFunctionBegin;
375:   PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
376:   PetscCall(MatSetType(C, MATSCALAPACK));
377:   PetscCall(MatSetUp(C));
378:   C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_ScaLAPACK;
379:   PetscFunctionReturn(PETSC_SUCCESS);
380: }

382: static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
383: {
384:   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
385:   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
386:   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
387:   PetscScalar    sone = 1.0, zero = 0.0;
388:   PetscBLASInt   one = 1;

390:   PetscFunctionBegin;
391:   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
392:   C->assembled = PETSC_TRUE;
393:   PetscFunctionReturn(PETSC_SUCCESS);
394: }

396: static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
397: {
398:   PetscFunctionBegin;
399:   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
400:   PetscCall(MatSetType(C, MATSCALAPACK));
401:   PetscCall(MatSetUp(C));
402:   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_ScaLAPACK;
403:   PetscFunctionReturn(PETSC_SUCCESS);
404: }

406: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
407: {
408:   Mat_Product *product = C->product;

410:   PetscFunctionBegin;
411:   switch (product->type) {
412:   case MATPRODUCT_AB:
413:     C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
414:     C->ops->productsymbolic = MatProductSymbolic_AB;
415:     break;
416:   case MATPRODUCT_AtB:
417:     C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_ScaLAPACK;
418:     C->ops->productsymbolic          = MatProductSymbolic_AtB;
419:     break;
420:   case MATPRODUCT_ABt:
421:     C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
422:     C->ops->productsymbolic          = MatProductSymbolic_ABt;
423:     break;
424:   default:
425:     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
426:   }
427:   PetscFunctionReturn(PETSC_SUCCESS);
428: }

430: static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D)
431: {
432:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
433:   PetscScalar    *darray, *d2d, v;
434:   const PetscInt *ranges;
435:   PetscBLASInt    j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;

437:   PetscFunctionBegin;
438:   PetscCall(VecGetArray(D, &darray));

440:   if (A->rmap->N <= A->cmap->N) { /* row version */

442:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
443:     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
444:     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
445:     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld));
446:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
447:     PetscCheckScaLapackInfo("descinit", info);

449:     /* allocate 2d vector */
450:     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
451:     PetscCall(PetscCalloc1(lszd, &d2d));
452:     PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld));

454:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
455:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
456:     PetscCheckScaLapackInfo("descinit", info);

458:     /* collect diagonal */
459:     for (j = 1; j <= a->M; j++) {
460:       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
461:       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
462:     }

464:     /* redistribute d from a column of a 2d matrix */
465:     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
466:     PetscCall(PetscFree(d2d));

468:   } else { /* column version */

470:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
471:     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
472:     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
473:     dlld = 1;
474:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
475:     PetscCheckScaLapackInfo("descinit", info);

477:     /* allocate 2d vector */
478:     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
479:     PetscCall(PetscCalloc1(lszd, &d2d));

481:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
482:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
483:     PetscCheckScaLapackInfo("descinit", info);

485:     /* collect diagonal */
486:     for (j = 1; j <= a->N; j++) {
487:       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
488:       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
489:     }

491:     /* redistribute d from a row of a 2d matrix */
492:     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
493:     PetscCall(PetscFree(d2d));
494:   }

496:   PetscCall(VecRestoreArray(D, &darray));
497:   PetscCall(VecAssemblyBegin(D));
498:   PetscCall(VecAssemblyEnd(D));
499:   PetscFunctionReturn(PETSC_SUCCESS);
500: }

502: static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R)
503: {
504:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
505:   const PetscScalar *d;
506:   const PetscInt    *ranges;
507:   PetscScalar       *d2d;
508:   PetscBLASInt       i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;

510:   PetscFunctionBegin;
511:   if (R) {
512:     PetscCall(VecGetArrayRead(R, &d));
513:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
514:     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
515:     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
516:     dlld = 1;
517:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
518:     PetscCheckScaLapackInfo("descinit", info);

520:     /* allocate 2d vector */
521:     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
522:     PetscCall(PetscCalloc1(lszd, &d2d));

524:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
525:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
526:     PetscCheckScaLapackInfo("descinit", info);

528:     /* redistribute d to a row of a 2d matrix */
529:     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));

531:     /* broadcast along process columns */
532:     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
533:     else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);

535:     /* local scaling */
536:     for (j = 0; j < a->locc; j++)
537:       for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];

539:     PetscCall(PetscFree(d2d));
540:     PetscCall(VecRestoreArrayRead(R, &d));
541:   }
542:   if (L) {
543:     PetscCall(VecGetArrayRead(L, &d));
544:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
545:     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
546:     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
547:     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld));
548:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
549:     PetscCheckScaLapackInfo("descinit", info);

551:     /* allocate 2d vector */
552:     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
553:     PetscCall(PetscCalloc1(lszd, &d2d));
554:     PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld));

556:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
557:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
558:     PetscCheckScaLapackInfo("descinit", info);

560:     /* redistribute d to a column of a 2d matrix */
561:     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));

563:     /* broadcast along process rows */
564:     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
565:     else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);

567:     /* local scaling */
568:     for (i = 0; i < a->locr; i++)
569:       for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];

571:     PetscCall(PetscFree(d2d));
572:     PetscCall(VecRestoreArrayRead(L, &d));
573:   }
574:   PetscFunctionReturn(PETSC_SUCCESS);
575: }

577: static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d)
578: {
579:   PetscFunctionBegin;
580:   *missing = PETSC_FALSE;
581:   PetscFunctionReturn(PETSC_SUCCESS);
582: }

584: static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a)
585: {
586:   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
587:   PetscBLASInt   n, one = 1;

589:   PetscFunctionBegin;
590:   n = x->lld * x->locc;
591:   PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
592:   PetscFunctionReturn(PETSC_SUCCESS);
593: }

595: static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha)
596: {
597:   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
598:   PetscBLASInt   i, n;
599:   PetscScalar    v;

601:   PetscFunctionBegin;
602:   n = PetscMin(x->M, x->N);
603:   for (i = 1; i <= n; i++) {
604:     PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
605:     v += alpha;
606:     PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
607:   }
608:   PetscFunctionReturn(PETSC_SUCCESS);
609: }

611: static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
612: {
613:   Mat_ScaLAPACK *x    = (Mat_ScaLAPACK *)X->data;
614:   Mat_ScaLAPACK *y    = (Mat_ScaLAPACK *)Y->data;
615:   PetscBLASInt   one  = 1;
616:   PetscScalar    beta = 1.0;

618:   PetscFunctionBegin;
619:   MatScaLAPACKCheckDistribution(Y, 1, X, 3);
620:   PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
621:   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
622:   PetscFunctionReturn(PETSC_SUCCESS);
623: }

625: static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str)
626: {
627:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
628:   Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;

630:   PetscFunctionBegin;
631:   PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
632:   PetscCall(PetscObjectStateIncrease((PetscObject)B));
633:   PetscFunctionReturn(PETSC_SUCCESS);
634: }

636: static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B)
637: {
638:   Mat            Bs;
639:   MPI_Comm       comm;
640:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;

642:   PetscFunctionBegin;
643:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
644:   PetscCall(MatCreate(comm, &Bs));
645:   PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
646:   PetscCall(MatSetType(Bs, MATSCALAPACK));
647:   b       = (Mat_ScaLAPACK *)Bs->data;
648:   b->M    = a->M;
649:   b->N    = a->N;
650:   b->mb   = a->mb;
651:   b->nb   = a->nb;
652:   b->rsrc = a->rsrc;
653:   b->csrc = a->csrc;
654:   PetscCall(MatSetUp(Bs));
655:   *B = Bs;
656:   if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
657:   Bs->assembled = PETSC_TRUE;
658:   PetscFunctionReturn(PETSC_SUCCESS);
659: }

661: static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
662: {
663:   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
664:   Mat            Bs   = *B;
665:   PetscBLASInt   one  = 1;
666:   PetscScalar    sone = 1.0, zero = 0.0;
667: #if defined(PETSC_USE_COMPLEX)
668:   PetscInt i, j;
669: #endif

671:   PetscFunctionBegin;
672:   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
673:   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
674:   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
675:   *B = Bs;
676:   b  = (Mat_ScaLAPACK *)Bs->data;
677:   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
678: #if defined(PETSC_USE_COMPLEX)
679:   /* undo conjugation */
680:   for (i = 0; i < b->locr; i++)
681:     for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
682: #endif
683:   Bs->assembled = PETSC_TRUE;
684:   PetscFunctionReturn(PETSC_SUCCESS);
685: }

687: static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
688: {
689:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
690:   PetscInt       i, j;

692:   PetscFunctionBegin;
693:   for (i = 0; i < a->locr; i++)
694:     for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
695:   PetscFunctionReturn(PETSC_SUCCESS);
696: }

698: static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
699: {
700:   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
701:   Mat            Bs   = *B;
702:   PetscBLASInt   one  = 1;
703:   PetscScalar    sone = 1.0, zero = 0.0;

705:   PetscFunctionBegin;
706:   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
707:   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
708:   *B = Bs;
709:   b  = (Mat_ScaLAPACK *)Bs->data;
710:   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
711:   Bs->assembled = PETSC_TRUE;
712:   PetscFunctionReturn(PETSC_SUCCESS);
713: }

715: static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X)
716: {
717:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
718:   PetscScalar    *x, *x2d;
719:   const PetscInt *ranges;
720:   PetscBLASInt    xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;

722:   PetscFunctionBegin;
723:   PetscCall(VecCopy(B, X));
724:   PetscCall(VecGetArray(X, &x));

726:   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
727:   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
728:   PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
729:   PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld));
730:   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
731:   PetscCheckScaLapackInfo("descinit", info);

733:   /* allocate 2d vector */
734:   lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
735:   PetscCall(PetscMalloc1(lszx, &x2d));
736:   PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld));

738:   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
739:   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
740:   PetscCheckScaLapackInfo("descinit", info);

742:   /* redistribute x as a column of a 2d matrix */
743:   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));

745:   /* call ScaLAPACK subroutine */
746:   switch (A->factortype) {
747:   case MAT_FACTOR_LU:
748:     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
749:     PetscCheckScaLapackInfo("getrs", info);
750:     break;
751:   case MAT_FACTOR_CHOLESKY:
752:     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
753:     PetscCheckScaLapackInfo("potrs", info);
754:     break;
755:   default:
756:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
757:   }

759:   /* redistribute x from a column of a 2d matrix */
760:   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));

762:   PetscCall(PetscFree(x2d));
763:   PetscCall(VecRestoreArray(X, &x));
764:   PetscFunctionReturn(PETSC_SUCCESS);
765: }

767: static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X)
768: {
769:   PetscFunctionBegin;
770:   PetscCall(MatSolve_ScaLAPACK(A, B, X));
771:   PetscCall(VecAXPY(X, 1, Y));
772:   PetscFunctionReturn(PETSC_SUCCESS);
773: }

775: static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X)
776: {
777:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *x;
778:   PetscBool      flg1, flg2;
779:   PetscBLASInt   one = 1, info;
780:   Mat            C;
781:   MatType        type;

783:   PetscFunctionBegin;
784:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
785:   PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
786:   if (flg1 && flg2) MatScaLAPACKCheckDistribution(B, 2, X, 3);
787:   if (flg2) {
788:     if (flg1) PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN));
789:     else PetscCall(MatConvert(B, MATSCALAPACK, MAT_REUSE_MATRIX, &X));
790:     C = X;
791:   } else {
792:     PetscCall(MatConvert(B, MATSCALAPACK, MAT_INITIAL_MATRIX, &C));
793:   }
794:   x = (Mat_ScaLAPACK *)C->data;

796:   switch (A->factortype) {
797:   case MAT_FACTOR_LU:
798:     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
799:     PetscCheckScaLapackInfo("getrs", info);
800:     break;
801:   case MAT_FACTOR_CHOLESKY:
802:     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
803:     PetscCheckScaLapackInfo("potrs", info);
804:     break;
805:   default:
806:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
807:   }
808:   if (!flg2) {
809:     PetscCall(MatGetType(X, &type));
810:     PetscCall(MatConvert(C, type, MAT_REUSE_MATRIX, &X));
811:     PetscCall(MatDestroy(&C));
812:   }
813:   PetscFunctionReturn(PETSC_SUCCESS);
814: }

816: static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo)
817: {
818:   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
819:   PetscBLASInt   one = 1, info;

821:   PetscFunctionBegin;
822:   if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots));
823:   PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
824:   PetscCheckScaLapackInfo("getrf", info);
825:   A->factortype = MAT_FACTOR_LU;
826:   A->assembled  = PETSC_TRUE;

828:   PetscCall(PetscFree(A->solvertype));
829:   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
830:   PetscFunctionReturn(PETSC_SUCCESS);
831: }

833: static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
834: {
835:   PetscFunctionBegin;
836:   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
837:   PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
838:   PetscFunctionReturn(PETSC_SUCCESS);
839: }

841: static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
842: {
843:   PetscFunctionBegin;
844:   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
845:   PetscFunctionReturn(PETSC_SUCCESS);
846: }

848: static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo)
849: {
850:   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
851:   PetscBLASInt   one = 1, info;

853:   PetscFunctionBegin;
854:   PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
855:   PetscCheckScaLapackInfo("potrf", info);
856:   A->factortype = MAT_FACTOR_CHOLESKY;
857:   A->assembled  = PETSC_TRUE;

859:   PetscCall(PetscFree(A->solvertype));
860:   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
861:   PetscFunctionReturn(PETSC_SUCCESS);
862: }

864: static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
865: {
866:   PetscFunctionBegin;
867:   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
868:   PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
869:   PetscFunctionReturn(PETSC_SUCCESS);
870: }

872: static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info)
873: {
874:   PetscFunctionBegin;
875:   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
876:   PetscFunctionReturn(PETSC_SUCCESS);
877: }

879: static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type)
880: {
881:   PetscFunctionBegin;
882:   *type = MATSOLVERSCALAPACK;
883:   PetscFunctionReturn(PETSC_SUCCESS);
884: }

886: static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F)
887: {
888:   Mat            B;
889:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;

891:   PetscFunctionBegin;
892:   /* Create the factorization matrix */
893:   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
894:   B->trivialsymbolic = PETSC_TRUE;
895:   B->factortype      = ftype;
896:   PetscCall(PetscFree(B->solvertype));
897:   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));

899:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
900:   *F = B;
901:   PetscFunctionReturn(PETSC_SUCCESS);
902: }

904: PETSC_INTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
905: {
906:   PetscFunctionBegin;
907:   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
908:   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
909:   PetscFunctionReturn(PETSC_SUCCESS);
910: }

912: static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm)
913: {
914:   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
915:   PetscBLASInt   one = 1, lwork = 0;
916:   const char    *ntype;
917:   PetscScalar   *work = NULL, dummy;

919:   PetscFunctionBegin;
920:   switch (type) {
921:   case NORM_1:
922:     ntype = "1";
923:     lwork = PetscMax(a->locr, a->locc);
924:     break;
925:   case NORM_FROBENIUS:
926:     ntype = "F";
927:     work  = &dummy;
928:     break;
929:   case NORM_INFINITY:
930:     ntype = "I";
931:     lwork = PetscMax(a->locr, a->locc);
932:     break;
933:   default:
934:     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
935:   }
936:   if (lwork) PetscCall(PetscMalloc1(lwork, &work));
937:   *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
938:   if (lwork) PetscCall(PetscFree(work));
939:   PetscFunctionReturn(PETSC_SUCCESS);
940: }

942: static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
943: {
944:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;

946:   PetscFunctionBegin;
947:   PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
948:   PetscFunctionReturn(PETSC_SUCCESS);
949: }

951: static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols)
952: {
953:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
954:   PetscInt       i, n, nb, isrc, nproc, iproc, *idx;

956:   PetscFunctionBegin;
957:   if (rows) {
958:     n     = a->locr;
959:     nb    = a->mb;
960:     isrc  = a->rsrc;
961:     nproc = a->grid->nprow;
962:     iproc = a->grid->myrow;
963:     PetscCall(PetscMalloc1(n, &idx));
964:     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
965:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
966:   }
967:   if (cols) {
968:     n     = a->locc;
969:     nb    = a->nb;
970:     isrc  = a->csrc;
971:     nproc = a->grid->npcol;
972:     iproc = a->grid->mycol;
973:     PetscCall(PetscMalloc1(n, &idx));
974:     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
975:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
976:   }
977:   PetscFunctionReturn(PETSC_SUCCESS);
978: }

980: static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B)
981: {
982:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
983:   Mat                Bmpi;
984:   MPI_Comm           comm;
985:   PetscInt           i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb;
986:   const PetscInt    *ranges, *branges, *cwork;
987:   const PetscScalar *vwork;
988:   PetscBLASInt       bdesc[9], bmb, zero = 0, one = 1, lld, info;
989:   PetscScalar       *barray;
990:   PetscBool          differ = PETSC_FALSE;
991:   PetscMPIInt        size;

993:   PetscFunctionBegin;
994:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
995:   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));

997:   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
998:     PetscCallMPI(MPI_Comm_size(comm, &size));
999:     PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
1000:     for (i = 0; i < size; i++)
1001:       if (ranges[i + 1] != branges[i + 1]) {
1002:         differ = PETSC_TRUE;
1003:         break;
1004:       }
1005:   }

1007:   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
1008:     PetscCall(MatCreate(comm, &Bmpi));
1009:     m = PETSC_DECIDE;
1010:     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1011:     n = PETSC_DECIDE;
1012:     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1013:     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1014:     PetscCall(MatSetType(Bmpi, MATDENSE));
1015:     PetscCall(MatSetUp(Bmpi));

1017:     /* create ScaLAPACK descriptor for B (1d block distribution) */
1018:     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1019:     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1020:     PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */
1021:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1022:     PetscCheckScaLapackInfo("descinit", info);

1024:     /* redistribute matrix */
1025:     PetscCall(MatDenseGetArray(Bmpi, &barray));
1026:     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1027:     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1028:     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1029:     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));

1031:     /* transfer rows of auxiliary matrix to the final matrix B */
1032:     PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
1033:     for (i = rstart; i < rend; i++) {
1034:       PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
1035:       PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
1036:       PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
1037:     }
1038:     PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
1039:     PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
1040:     PetscCall(MatDestroy(&Bmpi));

1042:   } else { /* normal cases */

1044:     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1045:     else {
1046:       PetscCall(MatCreate(comm, &Bmpi));
1047:       m = PETSC_DECIDE;
1048:       PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1049:       n = PETSC_DECIDE;
1050:       PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1051:       PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1052:       PetscCall(MatSetType(Bmpi, MATDENSE));
1053:       PetscCall(MatSetUp(Bmpi));
1054:     }

1056:     /* create ScaLAPACK descriptor for B (1d block distribution) */
1057:     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1058:     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1059:     PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */
1060:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1061:     PetscCheckScaLapackInfo("descinit", info);

1063:     /* redistribute matrix */
1064:     PetscCall(MatDenseGetArray(Bmpi, &barray));
1065:     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1066:     PetscCall(MatDenseRestoreArray(Bmpi, &barray));

1068:     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1069:     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1070:     if (reuse == MAT_INPLACE_MATRIX) {
1071:       PetscCall(MatHeaderReplace(A, &Bmpi));
1072:     } else *B = Bmpi;
1073:   }
1074:   PetscFunctionReturn(PETSC_SUCCESS);
1075: }

1077: static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct)
1078: {
1079:   const PetscInt *ranges;
1080:   PetscMPIInt     size;
1081:   PetscInt        i, n;

1083:   PetscFunctionBegin;
1084:   *correct = PETSC_TRUE;
1085:   PetscCallMPI(MPI_Comm_size(map->comm, &size));
1086:   if (size > 1) {
1087:     PetscCall(PetscLayoutGetRanges(map, &ranges));
1088:     n = ranges[1] - ranges[0];
1089:     for (i = 1; i < size; i++)
1090:       if (ranges[i + 1] - ranges[i] != n) break;
1091:     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
1092:   }
1093:   PetscFunctionReturn(PETSC_SUCCESS);
1094: }

1096: PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B)
1097: {
1098:   Mat_ScaLAPACK     *b;
1099:   Mat                Bmpi;
1100:   MPI_Comm           comm;
1101:   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n;
1102:   const PetscInt    *ranges, *rows, *cols;
1103:   PetscBLASInt       adesc[9], amb, zero = 0, one = 1, lld, info;
1104:   const PetscScalar *aarray;
1105:   IS                 ir, ic;
1106:   PetscInt           lda;
1107:   PetscBool          flg;

1109:   PetscFunctionBegin;
1110:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));

1112:   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1113:   else {
1114:     PetscCall(MatCreate(comm, &Bmpi));
1115:     m = PETSC_DECIDE;
1116:     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1117:     n = PETSC_DECIDE;
1118:     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1119:     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1120:     PetscCall(MatSetType(Bmpi, MATSCALAPACK));
1121:     PetscCall(MatSetUp(Bmpi));
1122:   }
1123:   b = (Mat_ScaLAPACK *)Bmpi->data;

1125:   PetscCall(MatDenseGetLDA(A, &lda));
1126:   PetscCall(MatDenseGetArrayRead(A, &aarray));
1127:   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1128:   if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1129:   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1130:     /* create ScaLAPACK descriptor for A (1d block distribution) */
1131:     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1132:     PetscCall(PetscBLASIntCast(ranges[1], &amb));        /* row block size */
1133:     PetscCall(PetscBLASIntCast(PetscMax(lda, 1), &lld)); /* local leading dimension */
1134:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1135:     PetscCheckScaLapackInfo("descinit", info);

1137:     /* redistribute matrix */
1138:     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1139:     Bmpi->nooffprocentries = PETSC_TRUE;
1140:   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1141:     PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1142:     b->roworiented = PETSC_FALSE;
1143:     PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1144:     PetscCall(ISGetIndices(ir, &rows));
1145:     PetscCall(ISGetIndices(ic, &cols));
1146:     PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1147:     PetscCall(ISRestoreIndices(ir, &rows));
1148:     PetscCall(ISRestoreIndices(ic, &cols));
1149:     PetscCall(ISDestroy(&ic));
1150:     PetscCall(ISDestroy(&ir));
1151:   }
1152:   PetscCall(MatDenseRestoreArrayRead(A, &aarray));
1153:   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1154:   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1155:   if (reuse == MAT_INPLACE_MATRIX) {
1156:     PetscCall(MatHeaderReplace(A, &Bmpi));
1157:   } else *B = Bmpi;
1158:   PetscFunctionReturn(PETSC_SUCCESS);
1159: }

1161: PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1162: {
1163:   Mat                mat_scal;
1164:   PetscInt           M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1165:   const PetscInt    *cols;
1166:   const PetscScalar *vals;

1168:   PetscFunctionBegin;
1169:   if (reuse == MAT_REUSE_MATRIX) {
1170:     mat_scal = *newmat;
1171:     PetscCall(MatZeroEntries(mat_scal));
1172:   } else {
1173:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1174:     m = PETSC_DECIDE;
1175:     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1176:     n = PETSC_DECIDE;
1177:     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1178:     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1179:     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1180:     PetscCall(MatSetUp(mat_scal));
1181:   }
1182:   for (row = rstart; row < rend; row++) {
1183:     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1184:     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
1185:     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1186:   }
1187:   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1188:   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));

1190:   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1191:   else *newmat = mat_scal;
1192:   PetscFunctionReturn(PETSC_SUCCESS);
1193: }

1195: PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1196: {
1197:   Mat                mat_scal;
1198:   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1199:   const PetscInt    *cols;
1200:   const PetscScalar *vals;
1201:   PetscScalar        v;

1203:   PetscFunctionBegin;
1204:   if (reuse == MAT_REUSE_MATRIX) {
1205:     mat_scal = *newmat;
1206:     PetscCall(MatZeroEntries(mat_scal));
1207:   } else {
1208:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1209:     m = PETSC_DECIDE;
1210:     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1211:     n = PETSC_DECIDE;
1212:     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1213:     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1214:     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1215:     PetscCall(MatSetUp(mat_scal));
1216:   }
1217:   PetscCall(MatGetRowUpperTriangular(A));
1218:   for (row = rstart; row < rend; row++) {
1219:     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1220:     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1221:     for (j = 0; j < ncols; j++) { /* lower triangular part */
1222:       if (cols[j] == row) continue;
1223:       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1224:       PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1225:     }
1226:     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1227:   }
1228:   PetscCall(MatRestoreRowUpperTriangular(A));
1229:   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1230:   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));

1232:   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1233:   else *newmat = mat_scal;
1234:   PetscFunctionReturn(PETSC_SUCCESS);
1235: }

1237: static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1238: {
1239:   Mat_ScaLAPACK *a  = (Mat_ScaLAPACK *)A->data;
1240:   PetscInt       sz = 0;

1242:   PetscFunctionBegin;
1243:   PetscCall(PetscLayoutSetUp(A->rmap));
1244:   PetscCall(PetscLayoutSetUp(A->cmap));
1245:   if (!a->lld) a->lld = a->locr;

1247:   PetscCall(PetscFree(a->loc));
1248:   PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
1249:   PetscCall(PetscCalloc1(sz, &a->loc));

1251:   A->preallocated = PETSC_TRUE;
1252:   PetscFunctionReturn(PETSC_SUCCESS);
1253: }

1255: static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1256: {
1257:   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK *)A->data;
1258:   Mat_ScaLAPACK_Grid *grid;
1259:   PetscBool           flg;
1260:   MPI_Comm            icomm;

1262:   PetscFunctionBegin;
1263:   PetscCall(MatStashDestroy_Private(&A->stash));
1264:   PetscCall(PetscFree(a->loc));
1265:   PetscCall(PetscFree(a->pivots));
1266:   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1267:   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1268:   if (--grid->grid_refct == 0) {
1269:     Cblacs_gridexit(grid->ictxt);
1270:     Cblacs_gridexit(grid->ictxrow);
1271:     Cblacs_gridexit(grid->ictxcol);
1272:     PetscCall(PetscFree(grid));
1273:     PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1274:   }
1275:   PetscCall(PetscCommDestroy(&icomm));
1276:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
1277:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1278:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
1279:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
1280:   PetscCall(PetscFree(A->data));
1281:   PetscFunctionReturn(PETSC_SUCCESS);
1282: }

1284: static PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1285: {
1286:   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
1287:   PetscBLASInt   info = 0;
1288:   PetscBool      flg;

1290:   PetscFunctionBegin;
1291:   PetscCall(PetscLayoutSetUp(A->rmap));
1292:   PetscCall(PetscLayoutSetUp(A->cmap));

1294:   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1295:   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1296:   PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1297:   PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1298:   PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");

1300:   /* compute local sizes */
1301:   PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
1302:   PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1303:   a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1304:   a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1305:   a->lld  = PetscMax(1, a->locr);

1307:   /* allocate local array */
1308:   PetscCall(MatScaLAPACKSetPreallocation(A));

1310:   /* set up ScaLAPACK descriptor */
1311:   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1312:   PetscCheckScaLapackInfo("descinit", info);
1313:   PetscFunctionReturn(PETSC_SUCCESS);
1314: }

1316: static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type)
1317: {
1318:   PetscInt nstash, reallocs;

1320:   PetscFunctionBegin;
1321:   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1322:   PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
1323:   PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
1324:   PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
1325:   PetscFunctionReturn(PETSC_SUCCESS);
1326: }

1328: static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type)
1329: {
1330:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1331:   PetscMPIInt    n;
1332:   PetscInt       i, flg, *row, *col;
1333:   PetscScalar   *val;
1334:   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;

1336:   PetscFunctionBegin;
1337:   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1338:   while (1) {
1339:     PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1340:     if (!flg) break;
1341:     for (i = 0; i < n; i++) {
1342:       PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
1343:       PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1344:       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1345:       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1346:       switch (A->insertmode) {
1347:       case INSERT_VALUES:
1348:         a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i];
1349:         break;
1350:       case ADD_VALUES:
1351:         a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i];
1352:         break;
1353:       default:
1354:         SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1355:       }
1356:     }
1357:   }
1358:   PetscCall(MatStashScatterEnd_Private(&A->stash));
1359:   PetscFunctionReturn(PETSC_SUCCESS);
1360: }

1362: static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer)
1363: {
1364:   Mat      Adense, As;
1365:   MPI_Comm comm;

1367:   PetscFunctionBegin;
1368:   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1369:   PetscCall(MatCreate(comm, &Adense));
1370:   PetscCall(MatSetType(Adense, MATDENSE));
1371:   PetscCall(MatLoad(Adense, viewer));
1372:   PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
1373:   PetscCall(MatDestroy(&Adense));
1374:   PetscCall(MatHeaderReplace(newMat, &As));
1375:   PetscFunctionReturn(PETSC_SUCCESS);
1376: }

1378: static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1379:                                        NULL,
1380:                                        NULL,
1381:                                        MatMult_ScaLAPACK,
1382:                                        /* 4*/ MatMultAdd_ScaLAPACK,
1383:                                        MatMultTranspose_ScaLAPACK,
1384:                                        MatMultTransposeAdd_ScaLAPACK,
1385:                                        MatSolve_ScaLAPACK,
1386:                                        MatSolveAdd_ScaLAPACK,
1387:                                        NULL,
1388:                                        /*10*/ NULL,
1389:                                        MatLUFactor_ScaLAPACK,
1390:                                        MatCholeskyFactor_ScaLAPACK,
1391:                                        NULL,
1392:                                        MatTranspose_ScaLAPACK,
1393:                                        /*15*/ MatGetInfo_ScaLAPACK,
1394:                                        NULL,
1395:                                        MatGetDiagonal_ScaLAPACK,
1396:                                        MatDiagonalScale_ScaLAPACK,
1397:                                        MatNorm_ScaLAPACK,
1398:                                        /*20*/ MatAssemblyBegin_ScaLAPACK,
1399:                                        MatAssemblyEnd_ScaLAPACK,
1400:                                        MatSetOption_ScaLAPACK,
1401:                                        MatZeroEntries_ScaLAPACK,
1402:                                        /*24*/ NULL,
1403:                                        MatLUFactorSymbolic_ScaLAPACK,
1404:                                        MatLUFactorNumeric_ScaLAPACK,
1405:                                        MatCholeskyFactorSymbolic_ScaLAPACK,
1406:                                        MatCholeskyFactorNumeric_ScaLAPACK,
1407:                                        /*29*/ MatSetUp_ScaLAPACK,
1408:                                        NULL,
1409:                                        NULL,
1410:                                        NULL,
1411:                                        NULL,
1412:                                        /*34*/ MatDuplicate_ScaLAPACK,
1413:                                        NULL,
1414:                                        NULL,
1415:                                        NULL,
1416:                                        NULL,
1417:                                        /*39*/ MatAXPY_ScaLAPACK,
1418:                                        NULL,
1419:                                        NULL,
1420:                                        NULL,
1421:                                        MatCopy_ScaLAPACK,
1422:                                        /*44*/ NULL,
1423:                                        MatScale_ScaLAPACK,
1424:                                        MatShift_ScaLAPACK,
1425:                                        NULL,
1426:                                        NULL,
1427:                                        /*49*/ NULL,
1428:                                        NULL,
1429:                                        NULL,
1430:                                        NULL,
1431:                                        NULL,
1432:                                        /*54*/ NULL,
1433:                                        NULL,
1434:                                        NULL,
1435:                                        NULL,
1436:                                        NULL,
1437:                                        /*59*/ NULL,
1438:                                        MatDestroy_ScaLAPACK,
1439:                                        MatView_ScaLAPACK,
1440:                                        NULL,
1441:                                        NULL,
1442:                                        /*64*/ NULL,
1443:                                        NULL,
1444:                                        NULL,
1445:                                        NULL,
1446:                                        NULL,
1447:                                        /*69*/ NULL,
1448:                                        NULL,
1449:                                        MatConvert_ScaLAPACK_Dense,
1450:                                        NULL,
1451:                                        NULL,
1452:                                        /*74*/ NULL,
1453:                                        NULL,
1454:                                        NULL,
1455:                                        NULL,
1456:                                        NULL,
1457:                                        /*79*/ NULL,
1458:                                        NULL,
1459:                                        NULL,
1460:                                        NULL,
1461:                                        MatLoad_ScaLAPACK,
1462:                                        /*84*/ NULL,
1463:                                        NULL,
1464:                                        NULL,
1465:                                        NULL,
1466:                                        NULL,
1467:                                        /*89*/ NULL,
1468:                                        NULL,
1469:                                        MatMatMultNumeric_ScaLAPACK,
1470:                                        NULL,
1471:                                        NULL,
1472:                                        /*94*/ NULL,
1473:                                        NULL,
1474:                                        NULL,
1475:                                        MatMatTransposeMultNumeric_ScaLAPACK,
1476:                                        NULL,
1477:                                        /*99*/ MatProductSetFromOptions_ScaLAPACK,
1478:                                        NULL,
1479:                                        NULL,
1480:                                        MatConjugate_ScaLAPACK,
1481:                                        NULL,
1482:                                        /*104*/ NULL,
1483:                                        NULL,
1484:                                        NULL,
1485:                                        NULL,
1486:                                        NULL,
1487:                                        /*109*/ MatMatSolve_ScaLAPACK,
1488:                                        NULL,
1489:                                        NULL,
1490:                                        NULL,
1491:                                        MatMissingDiagonal_ScaLAPACK,
1492:                                        /*114*/ NULL,
1493:                                        NULL,
1494:                                        NULL,
1495:                                        NULL,
1496:                                        NULL,
1497:                                        /*119*/ NULL,
1498:                                        MatHermitianTranspose_ScaLAPACK,
1499:                                        MatMultHermitianTranspose_ScaLAPACK,
1500:                                        MatMultHermitianTransposeAdd_ScaLAPACK,
1501:                                        NULL,
1502:                                        /*124*/ NULL,
1503:                                        NULL,
1504:                                        NULL,
1505:                                        NULL,
1506:                                        NULL,
1507:                                        /*129*/ NULL,
1508:                                        NULL,
1509:                                        NULL,
1510:                                        MatTransposeMatMultNumeric_ScaLAPACK,
1511:                                        NULL,
1512:                                        /*134*/ NULL,
1513:                                        NULL,
1514:                                        NULL,
1515:                                        NULL,
1516:                                        NULL,
1517:                                        NULL,
1518:                                        /*140*/ NULL,
1519:                                        NULL,
1520:                                        NULL,
1521:                                        NULL,
1522:                                        NULL,
1523:                                        /*145*/ NULL,
1524:                                        NULL,
1525:                                        NULL,
1526:                                        NULL,
1527:                                        NULL,
1528:                                        /*150*/ NULL,
1529:                                        NULL,
1530:                                        NULL,
1531:                                        NULL,
1532:                                        NULL,
1533:                                        /*155*/ NULL,
1534:                                        NULL};

1536: static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners)
1537: {
1538:   PetscInt          *owner, *startv, *starti, bs2;
1539:   PetscInt           size = stash->size, nsends;
1540:   PetscInt          *sindices, **rindices, j, l;
1541:   PetscScalar      **rvalues, *svalues;
1542:   MPI_Comm           comm = stash->comm;
1543:   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1544:   PetscMPIInt        tag1 = stash->tag1, tag2 = stash->tag2, *sizes, *nlengths, nreceives, insends, ii;
1545:   PetscInt          *sp_idx, *sp_idy;
1546:   PetscScalar       *sp_val;
1547:   PetscMatStashSpace space, space_next;
1548:   PetscBLASInt       gridx, gcidx, lridx, lcidx, rsrc, csrc;
1549:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)mat->data;

1551:   PetscFunctionBegin;
1552:   { /* make sure all processors are either in INSERTMODE or ADDMODE */
1553:     InsertMode addv;
1554:     PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
1555:     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1556:     mat->insertmode = addv; /* in case this processor had no cache */
1557:   }

1559:   bs2 = stash->bs * stash->bs;

1561:   /*  first count number of contributors to each processor */
1562:   PetscCall(PetscCalloc1(size, &nlengths));
1563:   PetscCall(PetscMalloc1(stash->n + 1, &owner));

1565:   ii = j = 0;
1566:   space  = stash->space_head;
1567:   while (space) {
1568:     space_next = space->next;
1569:     for (l = 0; l < space->local_used; l++) {
1570:       PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
1571:       PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1572:       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1573:       j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
1574:       nlengths[j]++;
1575:       owner[ii] = j;
1576:       ii++;
1577:     }
1578:     space = space_next;
1579:   }

1581:   /* Now check what procs get messages - and compute nsends. */
1582:   PetscCall(PetscCalloc1(size, &sizes));
1583:   nsends = 0;
1584:   for (PetscMPIInt i = 0; i < size; i++) {
1585:     if (nlengths[i]) {
1586:       sizes[i] = 1;
1587:       nsends++;
1588:     }
1589:   }

1591:   {
1592:     PetscMPIInt *onodes, *olengths;

1594:     /* Determine the number of messages to expect, their lengths, from from-ids */
1595:     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
1596:     PetscCall(PetscMPIIntCast(nsends, &insends));
1597:     PetscCall(PetscGatherMessageLengths(comm, insends, nreceives, nlengths, &onodes, &olengths));
1598:     /* since clubbing row,col - lengths are multiplied by 2 */
1599:     for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] *= 2;
1600:     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1601:     /* values are size 'bs2' lengths (and remove earlier factor 2 */
1602:     for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] = (PetscMPIInt)(olengths[i] * bs2 / 2);
1603:     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
1604:     PetscCall(PetscFree(onodes));
1605:     PetscCall(PetscFree(olengths));
1606:   }

1608:   /* do sends:
1609:       1) starts[i] gives the starting index in svalues for stuff going to
1610:          the ith processor
1611:   */
1612:   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
1613:   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
1614:   PetscCall(PetscMalloc2(size, &startv, size, &starti));
1615:   /* use 2 sends the first with all_a, the next with all_i and all_j */
1616:   startv[0] = 0;
1617:   starti[0] = 0;
1618:   for (PetscMPIInt i = 1; i < size; i++) {
1619:     startv[i] = startv[i - 1] + nlengths[i - 1];
1620:     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1621:   }

1623:   ii    = 0;
1624:   space = stash->space_head;
1625:   while (space) {
1626:     space_next = space->next;
1627:     sp_idx     = space->idx;
1628:     sp_idy     = space->idy;
1629:     sp_val     = space->val;
1630:     for (l = 0; l < space->local_used; l++) {
1631:       j = owner[ii];
1632:       if (bs2 == 1) {
1633:         svalues[startv[j]] = sp_val[l];
1634:       } else {
1635:         PetscInt     k;
1636:         PetscScalar *buf1, *buf2;
1637:         buf1 = svalues + bs2 * startv[j];
1638:         buf2 = space->val + bs2 * l;
1639:         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1640:       }
1641:       sindices[starti[j]]               = sp_idx[l];
1642:       sindices[starti[j] + nlengths[j]] = sp_idy[l];
1643:       startv[j]++;
1644:       starti[j]++;
1645:       ii++;
1646:     }
1647:     space = space_next;
1648:   }
1649:   startv[0] = 0;
1650:   for (PetscMPIInt i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];

1652:   for (PetscMPIInt i = 0, count = 0; i < size; i++) {
1653:     if (sizes[i]) {
1654:       PetscCallMPI(MPIU_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
1655:       PetscCallMPI(MPIU_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1656:     }
1657:   }
1658: #if defined(PETSC_USE_INFO)
1659:   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
1660:   for (PetscMPIInt i = 0; i < size; i++) {
1661:     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %d: size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1662:   }
1663: #endif
1664:   PetscCall(PetscFree(nlengths));
1665:   PetscCall(PetscFree(owner));
1666:   PetscCall(PetscFree2(startv, starti));
1667:   PetscCall(PetscFree(sizes));

1669:   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1670:   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));

1672:   for (PetscMPIInt i = 0; i < nreceives; i++) {
1673:     recv_waits[2 * i]     = recv_waits1[i];
1674:     recv_waits[2 * i + 1] = recv_waits2[i];
1675:   }
1676:   stash->recv_waits = recv_waits;

1678:   PetscCall(PetscFree(recv_waits1));
1679:   PetscCall(PetscFree(recv_waits2));

1681:   stash->svalues         = svalues;
1682:   stash->sindices        = sindices;
1683:   stash->rvalues         = rvalues;
1684:   stash->rindices        = rindices;
1685:   stash->send_waits      = send_waits;
1686:   stash->nsends          = (PetscMPIInt)nsends;
1687:   stash->nrecvs          = nreceives;
1688:   stash->reproduce_count = 0;
1689:   PetscFunctionReturn(PETSC_SUCCESS);
1690: }

1692: static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb)
1693: {
1694:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;

1696:   PetscFunctionBegin;
1697:   PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1698:   PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1699:   PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
1700:   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1701:   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1702:   PetscFunctionReturn(PETSC_SUCCESS);
1703: }

1705: /*@
1706:   MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1707:   the `MATSCALAPACK` matrix

1709:   Logically Collective

1711:   Input Parameters:
1712: + A  - a `MATSCALAPACK` matrix
1713: . mb - the row block size
1714: - nb - the column block size

1716:   Level: intermediate

1718:   Note:
1719:   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices

1721: .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1722: @*/
1723: PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb)
1724: {
1725:   PetscFunctionBegin;
1729:   PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
1730:   PetscFunctionReturn(PETSC_SUCCESS);
1731: }

1733: static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb)
1734: {
1735:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;

1737:   PetscFunctionBegin;
1738:   if (mb) *mb = a->mb;
1739:   if (nb) *nb = a->nb;
1740:   PetscFunctionReturn(PETSC_SUCCESS);
1741: }

1743: /*@
1744:   MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1745:   the `MATSCALAPACK` matrix

1747:   Not Collective

1749:   Input Parameter:
1750: . A - a `MATSCALAPACK` matrix

1752:   Output Parameters:
1753: + mb - the row block size
1754: - nb - the column block size

1756:   Level: intermediate

1758:   Note:
1759:   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices

1761: .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1762: @*/
1763: PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb)
1764: {
1765:   PetscFunctionBegin;
1767:   PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
1768:   PetscFunctionReturn(PETSC_SUCCESS);
1769: }

1771: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1772: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);

1774: /*MC
1775:    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package

1777:    Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK

1779:    Options Database Keys:
1780: +  -mat_type scalapack - sets the matrix type to `MATSCALAPACK`
1781: .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu`
1782: .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1783: -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)

1785:    Level: intermediate

1787:   Note:
1788:    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1789:    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1790:    the given rank.

1792: .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()`
1793: M*/

1795: PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1796: {
1797:   Mat_ScaLAPACK      *a;
1798:   PetscBool           flg, flg1;
1799:   Mat_ScaLAPACK_Grid *grid;
1800:   MPI_Comm            icomm;
1801:   PetscBLASInt        nprow, npcol, myrow, mycol;
1802:   PetscInt            optv1, k = 2, array[2] = {0, 0};
1803:   PetscMPIInt         size;

1805:   PetscFunctionBegin;
1806:   A->ops[0]     = MatOps_Values;
1807:   A->insertmode = NOT_SET_VALUES;

1809:   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1810:   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1811:   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1812:   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1813:   A->stash.ScatterDestroy = NULL;

1815:   PetscCall(PetscNew(&a));
1816:   A->data = (void *)a;

1818:   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1819:   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1820:     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, NULL));
1821:     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1822:     PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1823:   }
1824:   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1825:   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1826:   if (!flg) {
1827:     PetscCall(PetscNew(&grid));

1829:     PetscCallMPI(MPI_Comm_size(icomm, &size));
1830:     PetscCall(PetscBLASIntCast(PetscSqrtReal((PetscReal)size) + 0.001, &grid->nprow));

1832:     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
1833:     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1));
1834:     if (flg1) {
1835:       PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
1836:       PetscCall(PetscBLASIntCast(optv1, &grid->nprow));
1837:     }
1838:     PetscOptionsEnd();

1840:     if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1841:     grid->npcol = size / grid->nprow;
1842:     PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
1843:     PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1844:     grid->ictxt = Csys2blacs_handle(icomm);
1845:     Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1846:     Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1847:     grid->grid_refct = 1;
1848:     grid->nprow      = nprow;
1849:     grid->npcol      = npcol;
1850:     grid->myrow      = myrow;
1851:     grid->mycol      = mycol;
1852:     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1853:     grid->ictxrow = Csys2blacs_handle(icomm);
1854:     Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1855:     grid->ictxcol = Csys2blacs_handle(icomm);
1856:     Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
1857:     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));

1859:   } else grid->grid_refct++;
1860:   PetscCall(PetscCommDestroy(&icomm));
1861:   a->grid = grid;
1862:   a->mb   = DEFAULT_BLOCKSIZE;
1863:   a->nb   = DEFAULT_BLOCKSIZE;

1865:   PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
1866:   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1867:   if (flg) {
1868:     a->mb = (PetscMPIInt)array[0];
1869:     a->nb = (k > 1) ? (PetscMPIInt)array[1] : a->mb;
1870:   }
1871:   PetscOptionsEnd();

1873:   a->roworiented = PETSC_TRUE;
1874:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
1875:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
1876:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
1877:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
1878:   PetscFunctionReturn(PETSC_SUCCESS);
1879: }

1881: /*@C
1882:   MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1883:   (2D block cyclic distribution) for a `MATSCALAPACK` matrix

1885:   Collective

1887:   Input Parameters:
1888: + comm - MPI communicator
1889: . mb   - row block size (or `PETSC_DECIDE` to have it set)
1890: . nb   - column block size (or `PETSC_DECIDE` to have it set)
1891: . M    - number of global rows
1892: . N    - number of global columns
1893: . rsrc - coordinate of process that owns the first row of the distributed matrix
1894: - csrc - coordinate of process that owns the first column of the distributed matrix

1896:   Output Parameter:
1897: . A - the matrix

1899:   Options Database Key:
1900: . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)

1902:   Level: intermediate

1904:   Notes:
1905:   If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen

1907:   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1908:   MatXXXXSetPreallocation() paradigm instead of this routine directly.
1909:   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]

1911:   Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1912:   configured with ScaLAPACK. In particular, PETSc's local sizes lose
1913:   significance and are thus ignored. The block sizes refer to the values
1914:   used for the distributed matrix, not the same meaning as in `MATBAIJ`.

1916: .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1917: @*/
1918: PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A)
1919: {
1920:   Mat_ScaLAPACK *a;
1921:   PetscInt       m, n;

1923:   PetscFunctionBegin;
1924:   PetscCall(MatCreate(comm, A));
1925:   PetscCall(MatSetType(*A, MATSCALAPACK));
1926:   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1927:   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1928:   m = PETSC_DECIDE;
1929:   PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1930:   n = PETSC_DECIDE;
1931:   PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1932:   PetscCall(MatSetSizes(*A, m, n, M, N));
1933:   a = (Mat_ScaLAPACK *)(*A)->data;
1934:   PetscCall(PetscBLASIntCast(M, &a->M));
1935:   PetscCall(PetscBLASIntCast(N, &a->N));
1936:   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1937:   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1938:   PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
1939:   PetscCall(PetscBLASIntCast(csrc, &a->csrc));
1940:   PetscCall(MatSetUp(*A));
1941:   PetscFunctionReturn(PETSC_SUCCESS);
1942: }