Actual source code: matscalapack.c

  1: #include <petsc/private/petscscalapack.h>

  3: const char       ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
  4:                                        "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
  5:                                        "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
  6:                                        "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
  7:                                        "       TITLE = {Sca{LAPACK} Users' Guide},\n"
  8:                                        "       PUBLISHER = {SIAM},\n"
  9:                                        "       ADDRESS = {Philadelphia, PA},\n"
 10:                                        "       YEAR = 1997\n"
 11:                                        "}\n";
 12: static PetscBool ScaLAPACKCite       = PETSC_FALSE;

 14: #define DEFAULT_BLOCKSIZE 64

 16: /*
 17:     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
 18:   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
 19: */
 20: static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;

 22: static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
 23: {
 24:   PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n");
 25:   MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);
 26:   return 0;
 27: }

 29: static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer)
 30: {
 31:   Mat_ScaLAPACK    *a = (Mat_ScaLAPACK *)A->data;
 32:   PetscBool         iascii;
 33:   PetscViewerFormat format;
 34:   Mat               Adense;

 36:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
 37:   if (iascii) {
 38:     PetscViewerGetFormat(viewer, &format);
 39:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
 40:       PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb);
 41:       PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol);
 42:       PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc);
 43:       PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc);
 44:       return 0;
 45:     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
 46:       return 0;
 47:     }
 48:   }
 49:   /* convert to dense format and call MatView() */
 50:   MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense);
 51:   MatView(Adense, viewer);
 52:   MatDestroy(&Adense);
 53:   return 0;
 54: }

 56: static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info)
 57: {
 58:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
 59:   PetscLogDouble isend[2], irecv[2];

 61:   info->block_size = 1.0;

 63:   isend[0] = a->lld * a->locc;  /* locally allocated */
 64:   isend[1] = a->locr * a->locc; /* used submatrix */
 65:   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
 66:     info->nz_allocated = isend[0];
 67:     info->nz_used      = isend[1];
 68:   } else if (flag == MAT_GLOBAL_MAX) {
 69:     MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A));
 70:     info->nz_allocated = irecv[0];
 71:     info->nz_used      = irecv[1];
 72:   } else if (flag == MAT_GLOBAL_SUM) {
 73:     MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A));
 74:     info->nz_allocated = irecv[0];
 75:     info->nz_used      = irecv[1];
 76:   }

 78:   info->nz_unneeded       = 0;
 79:   info->assemblies        = A->num_ass;
 80:   info->mallocs           = 0;
 81:   info->memory            = 0; /* REVIEW ME */
 82:   info->fill_ratio_given  = 0;
 83:   info->fill_ratio_needed = 0;
 84:   info->factor_mallocs    = 0;
 85:   return 0;
 86: }

 88: PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg)
 89: {
 90:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;

 92:   switch (op) {
 93:   case MAT_NEW_NONZERO_LOCATIONS:
 94:   case MAT_NEW_NONZERO_LOCATION_ERR:
 95:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
 96:   case MAT_SYMMETRIC:
 97:   case MAT_SORTED_FULL:
 98:   case MAT_HERMITIAN:
 99:     break;
100:   case MAT_ROW_ORIENTED:
101:     a->roworiented = flg;
102:     break;
103:   default:
104:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
105:   }
106:   return 0;
107: }

109: static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
110: {
111:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
112:   PetscInt       i, j;
113:   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
114:   PetscBool      roworiented = a->roworiented;

117:   for (i = 0; i < nr; i++) {
118:     if (rows[i] < 0) continue;
119:     PetscBLASIntCast(rows[i] + 1, &gridx);
120:     for (j = 0; j < nc; j++) {
121:       if (cols[j] < 0) continue;
122:       PetscBLASIntCast(cols[j] + 1, &gcidx);
123:       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
124:       if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
125:         if (roworiented) {
126:           switch (imode) {
127:           case INSERT_VALUES:
128:             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j];
129:             break;
130:           default:
131:             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j];
132:             break;
133:           }
134:         } else {
135:           switch (imode) {
136:           case INSERT_VALUES:
137:             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr];
138:             break;
139:           default:
140:             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr];
141:             break;
142:           }
143:         }
144:       } else {
146:         A->assembled = PETSC_FALSE;
147:         MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES));
148:       }
149:     }
150:   }
151:   return 0;
152: }

154: static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscScalar beta, const PetscScalar *x, PetscScalar *y)
155: {
156:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
157:   PetscScalar    *x2d, *y2d, alpha = 1.0;
158:   const PetscInt *ranges;
159:   PetscBLASInt    xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;

161:   if (transpose) {
162:     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
163:     PetscLayoutGetRanges(A->rmap, &ranges);
164:     PetscBLASIntCast(ranges[1], &mb); /* x block size */
165:     xlld = PetscMax(1, A->rmap->n);
166:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
168:     PetscLayoutGetRanges(A->cmap, &ranges);
169:     PetscBLASIntCast(ranges[1], &nb); /* y block size */
170:     ylld = 1;
171:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));

174:     /* allocate 2d vectors */
175:     lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
176:     lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
177:     PetscMalloc2(lszx, &x2d, lszy, &y2d);
178:     xlld = PetscMax(1, lszx);

180:     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
181:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
183:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));

186:     /* redistribute x as a column of a 2d matrix */
187:     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));

189:     /* redistribute y as a row of a 2d matrix */
190:     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));

192:     /* call PBLAS subroutine */
193:     PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));

195:     /* redistribute y from a row of a 2d matrix */
196:     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));

198:   } else { /* non-transpose */

200:     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
201:     PetscLayoutGetRanges(A->cmap, &ranges);
202:     PetscBLASIntCast(ranges[1], &nb); /* x block size */
203:     xlld = 1;
204:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
206:     PetscLayoutGetRanges(A->rmap, &ranges);
207:     PetscBLASIntCast(ranges[1], &mb); /* y block size */
208:     ylld = PetscMax(1, A->rmap->n);
209:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));

212:     /* allocate 2d vectors */
213:     lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
214:     lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
215:     PetscMalloc2(lszx, &x2d, lszy, &y2d);
216:     ylld = PetscMax(1, lszy);

218:     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
219:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
221:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));

224:     /* redistribute x as a row of a 2d matrix */
225:     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));

227:     /* redistribute y as a column of a 2d matrix */
228:     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));

230:     /* call PBLAS subroutine */
231:     PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));

233:     /* redistribute y from a column of a 2d matrix */
234:     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
235:   }
236:   PetscFree2(x2d, y2d);
237:   return 0;
238: }

240: static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y)
241: {
242:   const PetscScalar *xarray;
243:   PetscScalar       *yarray;

245:   VecGetArrayRead(x, &xarray);
246:   VecGetArray(y, &yarray);
247:   MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 0.0, xarray, yarray);
248:   VecRestoreArrayRead(x, &xarray);
249:   VecRestoreArray(y, &yarray);
250:   return 0;
251: }

253: static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
254: {
255:   const PetscScalar *xarray;
256:   PetscScalar       *yarray;

258:   VecGetArrayRead(x, &xarray);
259:   VecGetArray(y, &yarray);
260:   MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 0.0, xarray, yarray);
261:   VecRestoreArrayRead(x, &xarray);
262:   VecRestoreArray(y, &yarray);
263:   return 0;
264: }

266: static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
267: {
268:   const PetscScalar *xarray;
269:   PetscScalar       *zarray;

271:   if (y != z) VecCopy(y, z);
272:   VecGetArrayRead(x, &xarray);
273:   VecGetArray(z, &zarray);
274:   MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 1.0, xarray, zarray);
275:   VecRestoreArrayRead(x, &xarray);
276:   VecRestoreArray(z, &zarray);
277:   return 0;
278: }

280: static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
281: {
282:   const PetscScalar *xarray;
283:   PetscScalar       *zarray;

285:   if (y != z) VecCopy(y, z);
286:   VecGetArrayRead(x, &xarray);
287:   VecGetArray(z, &zarray);
288:   MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 1.0, xarray, zarray);
289:   VecRestoreArrayRead(x, &xarray);
290:   VecRestoreArray(z, &zarray);
291:   return 0;
292: }

294: PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
295: {
296:   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
297:   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
298:   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
299:   PetscScalar    sone = 1.0, zero = 0.0;
300:   PetscBLASInt   one = 1;

302:   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
303:   C->assembled = PETSC_TRUE;
304:   return 0;
305: }

307: PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
308: {
309:   MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE);
310:   MatSetType(C, MATSCALAPACK);
311:   MatSetUp(C);
312:   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
313:   return 0;
314: }

316: static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
317: {
318:   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
319:   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
320:   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
321:   PetscScalar    sone = 1.0, zero = 0.0;
322:   PetscBLASInt   one = 1;

324:   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
325:   C->assembled = PETSC_TRUE;
326:   return 0;
327: }

329: static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
330: {
331:   MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE);
332:   MatSetType(C, MATSCALAPACK);
333:   MatSetUp(C);
334:   return 0;
335: }

337: /* --------------------------------------- */
338: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
339: {
340:   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
341:   C->ops->productsymbolic = MatProductSymbolic_AB;
342:   return 0;
343: }

345: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
346: {
347:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
348:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
349:   return 0;
350: }

352: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
353: {
354:   Mat_Product *product = C->product;

356:   switch (product->type) {
357:   case MATPRODUCT_AB:
358:     MatProductSetFromOptions_ScaLAPACK_AB(C);
359:     break;
360:   case MATPRODUCT_ABt:
361:     MatProductSetFromOptions_ScaLAPACK_ABt(C);
362:     break;
363:   default:
364:     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
365:   }
366:   return 0;
367: }
368: /* --------------------------------------- */

370: static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D)
371: {
372:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
373:   PetscScalar    *darray, *d2d, v;
374:   const PetscInt *ranges;
375:   PetscBLASInt    j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;

377:   VecGetArray(D, &darray);

379:   if (A->rmap->N <= A->cmap->N) { /* row version */

381:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
382:     PetscLayoutGetRanges(A->rmap, &ranges);
383:     PetscBLASIntCast(ranges[1], &mb); /* D block size */
384:     dlld = PetscMax(1, A->rmap->n);
385:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));

388:     /* allocate 2d vector */
389:     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
390:     PetscCalloc1(lszd, &d2d);
391:     dlld = PetscMax(1, lszd);

393:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
394:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));

397:     /* collect diagonal */
398:     for (j = 1; j <= a->M; j++) {
399:       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
400:       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
401:     }

403:     /* redistribute d from a column of a 2d matrix */
404:     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
405:     PetscFree(d2d);

407:   } else { /* column version */

409:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
410:     PetscLayoutGetRanges(A->cmap, &ranges);
411:     PetscBLASIntCast(ranges[1], &nb); /* D block size */
412:     dlld = 1;
413:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));

416:     /* allocate 2d vector */
417:     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
418:     PetscCalloc1(lszd, &d2d);

420:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
421:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));

424:     /* collect diagonal */
425:     for (j = 1; j <= a->N; j++) {
426:       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
427:       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
428:     }

430:     /* redistribute d from a row of a 2d matrix */
431:     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
432:     PetscFree(d2d);
433:   }

435:   VecRestoreArray(D, &darray);
436:   VecAssemblyBegin(D);
437:   VecAssemblyEnd(D);
438:   return 0;
439: }

441: static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R)
442: {
443:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
444:   const PetscScalar *d;
445:   const PetscInt    *ranges;
446:   PetscScalar       *d2d;
447:   PetscBLASInt       i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;

449:   if (R) {
450:     VecGetArrayRead(R, (const PetscScalar **)&d);
451:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
452:     PetscLayoutGetRanges(A->cmap, &ranges);
453:     PetscBLASIntCast(ranges[1], &nb); /* D block size */
454:     dlld = 1;
455:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));

458:     /* allocate 2d vector */
459:     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
460:     PetscCalloc1(lszd, &d2d);

462:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
463:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));

466:     /* redistribute d to a row of a 2d matrix */
467:     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));

469:     /* broadcast along process columns */
470:     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
471:     else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);

473:     /* local scaling */
474:     for (j = 0; j < a->locc; j++)
475:       for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];

477:     PetscFree(d2d);
478:     VecRestoreArrayRead(R, (const PetscScalar **)&d);
479:   }
480:   if (L) {
481:     VecGetArrayRead(L, (const PetscScalar **)&d);
482:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
483:     PetscLayoutGetRanges(A->rmap, &ranges);
484:     PetscBLASIntCast(ranges[1], &mb); /* D block size */
485:     dlld = PetscMax(1, A->rmap->n);
486:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));

489:     /* allocate 2d vector */
490:     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
491:     PetscCalloc1(lszd, &d2d);
492:     dlld = PetscMax(1, lszd);

494:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
495:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));

498:     /* redistribute d to a column of a 2d matrix */
499:     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));

501:     /* broadcast along process rows */
502:     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
503:     else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);

505:     /* local scaling */
506:     for (i = 0; i < a->locr; i++)
507:       for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];

509:     PetscFree(d2d);
510:     VecRestoreArrayRead(L, (const PetscScalar **)&d);
511:   }
512:   return 0;
513: }

515: static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d)
516: {
517:   *missing = PETSC_FALSE;
518:   return 0;
519: }

521: static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a)
522: {
523:   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
524:   PetscBLASInt   n, one = 1;

526:   n = x->lld * x->locc;
527:   PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
528:   return 0;
529: }

531: static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha)
532: {
533:   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
534:   PetscBLASInt   i, n;
535:   PetscScalar    v;

537:   n = PetscMin(x->M, x->N);
538:   for (i = 1; i <= n; i++) {
539:     PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
540:     v += alpha;
541:     PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
542:   }
543:   return 0;
544: }

546: static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
547: {
548:   Mat_ScaLAPACK *x    = (Mat_ScaLAPACK *)X->data;
549:   Mat_ScaLAPACK *y    = (Mat_ScaLAPACK *)Y->data;
550:   PetscBLASInt   one  = 1;
551:   PetscScalar    beta = 1.0;

553:   MatScaLAPACKCheckDistribution(Y, 1, X, 3);
554:   PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
555:   PetscObjectStateIncrease((PetscObject)Y);
556:   return 0;
557: }

559: static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str)
560: {
561:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
562:   Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;

564:   PetscArraycpy(b->loc, a->loc, a->lld * a->locc);
565:   PetscObjectStateIncrease((PetscObject)B);
566:   return 0;
567: }

569: static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B)
570: {
571:   Mat            Bs;
572:   MPI_Comm       comm;
573:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;

575:   PetscObjectGetComm((PetscObject)A, &comm);
576:   MatCreate(comm, &Bs);
577:   MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE);
578:   MatSetType(Bs, MATSCALAPACK);
579:   b       = (Mat_ScaLAPACK *)Bs->data;
580:   b->M    = a->M;
581:   b->N    = a->N;
582:   b->mb   = a->mb;
583:   b->nb   = a->nb;
584:   b->rsrc = a->rsrc;
585:   b->csrc = a->csrc;
586:   MatSetUp(Bs);
587:   *B = Bs;
588:   if (op == MAT_COPY_VALUES) PetscArraycpy(b->loc, a->loc, a->lld * a->locc);
589:   Bs->assembled = PETSC_TRUE;
590:   return 0;
591: }

593: static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
594: {
595:   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
596:   Mat            Bs   = *B;
597:   PetscBLASInt   one  = 1;
598:   PetscScalar    sone = 1.0, zero = 0.0;
599: #if defined(PETSC_USE_COMPLEX)
600:   PetscInt i, j;
601: #endif

603:   if (reuse == MAT_REUSE_MATRIX) MatTransposeCheckNonzeroState_Private(A, *B);
604:   if (reuse == MAT_INITIAL_MATRIX) {
605:     MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs);
606:     *B = Bs;
607:   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
608:   b = (Mat_ScaLAPACK *)Bs->data;
609:   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
610: #if defined(PETSC_USE_COMPLEX)
611:   /* undo conjugation */
612:   for (i = 0; i < b->locr; i++)
613:     for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
614: #endif
615:   Bs->assembled = PETSC_TRUE;
616:   return 0;
617: }

619: static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
620: {
621:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
622:   PetscInt       i, j;

624:   for (i = 0; i < a->locr; i++)
625:     for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
626:   return 0;
627: }

629: static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
630: {
631:   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
632:   Mat            Bs   = *B;
633:   PetscBLASInt   one  = 1;
634:   PetscScalar    sone = 1.0, zero = 0.0;

636:   if (reuse == MAT_INITIAL_MATRIX) {
637:     MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs);
638:     *B = Bs;
639:   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
640:   b = (Mat_ScaLAPACK *)Bs->data;
641:   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
642:   Bs->assembled = PETSC_TRUE;
643:   return 0;
644: }

646: static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X)
647: {
648:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
649:   PetscScalar    *x, *x2d;
650:   const PetscInt *ranges;
651:   PetscBLASInt    xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;

653:   VecCopy(B, X);
654:   VecGetArray(X, &x);

656:   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
657:   PetscLayoutGetRanges(A->rmap, &ranges);
658:   PetscBLASIntCast(ranges[1], &mb); /* x block size */
659:   xlld = PetscMax(1, A->rmap->n);
660:   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));

663:   /* allocate 2d vector */
664:   lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
665:   PetscMalloc1(lszx, &x2d);
666:   xlld = PetscMax(1, lszx);

668:   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
669:   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));

672:   /* redistribute x as a column of a 2d matrix */
673:   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));

675:   /* call ScaLAPACK subroutine */
676:   switch (A->factortype) {
677:   case MAT_FACTOR_LU:
678:     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
680:     break;
681:   case MAT_FACTOR_CHOLESKY:
682:     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
684:     break;
685:   default:
686:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
687:   }

689:   /* redistribute x from a column of a 2d matrix */
690:   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));

692:   PetscFree(x2d);
693:   VecRestoreArray(X, &x);
694:   return 0;
695: }

697: static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X)
698: {
699:   MatSolve_ScaLAPACK(A, B, X);
700:   VecAXPY(X, 1, Y);
701:   return 0;
702: }

704: static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X)
705: {
706:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x;
707:   PetscBool      flg1, flg2;
708:   PetscBLASInt   one = 1, info;

710:   PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1);
711:   PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2);
713:   MatScaLAPACKCheckDistribution(B, 1, X, 2);
714:   b = (Mat_ScaLAPACK *)B->data;
715:   x = (Mat_ScaLAPACK *)X->data;
716:   PetscArraycpy(x->loc, b->loc, b->lld * b->locc);

718:   switch (A->factortype) {
719:   case MAT_FACTOR_LU:
720:     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
722:     break;
723:   case MAT_FACTOR_CHOLESKY:
724:     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
726:     break;
727:   default:
728:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
729:   }
730:   return 0;
731: }

733: static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo)
734: {
735:   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
736:   PetscBLASInt   one = 1, info;

738:   if (!a->pivots) { PetscMalloc1(a->locr + a->mb, &a->pivots); }
739:   PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
741:   A->factortype = MAT_FACTOR_LU;
742:   A->assembled  = PETSC_TRUE;

744:   PetscFree(A->solvertype);
745:   PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype);
746:   return 0;
747: }

749: static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
750: {
751:   MatCopy(A, F, SAME_NONZERO_PATTERN);
752:   MatLUFactor_ScaLAPACK(F, 0, 0, info);
753:   return 0;
754: }

756: static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
757: {
758:   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
759:   return 0;
760: }

762: static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo)
763: {
764:   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
765:   PetscBLASInt   one = 1, info;

767:   PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
769:   A->factortype = MAT_FACTOR_CHOLESKY;
770:   A->assembled  = PETSC_TRUE;

772:   PetscFree(A->solvertype);
773:   PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype);
774:   return 0;
775: }

777: static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
778: {
779:   MatCopy(A, F, SAME_NONZERO_PATTERN);
780:   MatCholeskyFactor_ScaLAPACK(F, 0, info);
781:   return 0;
782: }

784: static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info)
785: {
786:   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
787:   return 0;
788: }

790: PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type)
791: {
792:   *type = MATSOLVERSCALAPACK;
793:   return 0;
794: }

796: static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F)
797: {
798:   Mat            B;
799:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;

801:   /* Create the factorization matrix */
802:   MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B);
803:   B->trivialsymbolic = PETSC_TRUE;
804:   B->factortype      = ftype;
805:   PetscFree(B->solvertype);
806:   PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype);

808:   PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack);
809:   *F = B;
810:   return 0;
811: }

813: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
814: {
815:   MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack);
816:   MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack);
817:   return 0;
818: }

820: static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm)
821: {
822:   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
823:   PetscBLASInt   one = 1, lwork = 0;
824:   const char    *ntype;
825:   PetscScalar   *work = NULL, dummy;

827:   switch (type) {
828:   case NORM_1:
829:     ntype = "1";
830:     lwork = PetscMax(a->locr, a->locc);
831:     break;
832:   case NORM_FROBENIUS:
833:     ntype = "F";
834:     work  = &dummy;
835:     break;
836:   case NORM_INFINITY:
837:     ntype = "I";
838:     lwork = PetscMax(a->locr, a->locc);
839:     break;
840:   default:
841:     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
842:   }
843:   if (lwork) PetscMalloc1(lwork, &work);
844:   *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
845:   if (lwork) PetscFree(work);
846:   return 0;
847: }

849: static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
850: {
851:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;

853:   PetscArrayzero(a->loc, a->lld * a->locc);
854:   return 0;
855: }

857: static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols)
858: {
859:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
860:   PetscInt       i, n, nb, isrc, nproc, iproc, *idx;

862:   if (rows) {
863:     n     = a->locr;
864:     nb    = a->mb;
865:     isrc  = a->rsrc;
866:     nproc = a->grid->nprow;
867:     iproc = a->grid->myrow;
868:     PetscMalloc1(n, &idx);
869:     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
870:     ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows);
871:   }
872:   if (cols) {
873:     n     = a->locc;
874:     nb    = a->nb;
875:     isrc  = a->csrc;
876:     nproc = a->grid->npcol;
877:     iproc = a->grid->mycol;
878:     PetscMalloc1(n, &idx);
879:     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
880:     ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols);
881:   }
882:   return 0;
883: }

885: static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B)
886: {
887:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
888:   Mat                Bmpi;
889:   MPI_Comm           comm;
890:   PetscInt           i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz;
891:   const PetscInt    *ranges, *branges, *cwork;
892:   const PetscScalar *vwork;
893:   PetscBLASInt       bdesc[9], bmb, zero = 0, one = 1, lld, info;
894:   PetscScalar       *barray;
895:   PetscBool          differ = PETSC_FALSE;
896:   PetscMPIInt        size;

898:   PetscObjectGetComm((PetscObject)A, &comm);
899:   PetscLayoutGetRanges(A->rmap, &ranges);

901:   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
902:     MPI_Comm_size(comm, &size);
903:     PetscLayoutGetRanges((*B)->rmap, &branges);
904:     for (i = 0; i < size; i++)
905:       if (ranges[i + 1] != branges[i + 1]) {
906:         differ = PETSC_TRUE;
907:         break;
908:       }
909:   }

911:   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
912:     MatCreate(comm, &Bmpi);
913:     m = PETSC_DECIDE;
914:     PetscSplitOwnershipEqual(comm, &m, &M);
915:     n = PETSC_DECIDE;
916:     PetscSplitOwnershipEqual(comm, &n, &N);
917:     MatSetSizes(Bmpi, m, n, M, N);
918:     MatSetType(Bmpi, MATDENSE);
919:     MatSetUp(Bmpi);

921:     /* create ScaLAPACK descriptor for B (1d block distribution) */
922:     PetscBLASIntCast(ranges[1], &bmb); /* row block size */
923:     lld = PetscMax(A->rmap->n, 1);                /* local leading dimension */
924:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));

927:     /* redistribute matrix */
928:     MatDenseGetArray(Bmpi, &barray);
929:     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
930:     MatDenseRestoreArray(Bmpi, &barray);
931:     MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY);
932:     MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY);

934:     /* transfer rows of auxiliary matrix to the final matrix B */
935:     MatGetOwnershipRange(Bmpi, &rstart, &rend);
936:     for (i = rstart; i < rend; i++) {
937:       MatGetRow(Bmpi, i, &nz, &cwork, &vwork);
938:       MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES);
939:       MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork);
940:     }
941:     MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY);
942:     MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY);
943:     MatDestroy(&Bmpi);

945:   } else { /* normal cases */

947:     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
948:     else {
949:       MatCreate(comm, &Bmpi);
950:       m = PETSC_DECIDE;
951:       PetscSplitOwnershipEqual(comm, &m, &M);
952:       n = PETSC_DECIDE;
953:       PetscSplitOwnershipEqual(comm, &n, &N);
954:       MatSetSizes(Bmpi, m, n, M, N);
955:       MatSetType(Bmpi, MATDENSE);
956:       MatSetUp(Bmpi);
957:     }

959:     /* create ScaLAPACK descriptor for B (1d block distribution) */
960:     PetscBLASIntCast(ranges[1], &bmb); /* row block size */
961:     lld = PetscMax(A->rmap->n, 1);                /* local leading dimension */
962:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));

965:     /* redistribute matrix */
966:     MatDenseGetArray(Bmpi, &barray);
967:     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
968:     MatDenseRestoreArray(Bmpi, &barray);

970:     MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY);
971:     MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY);
972:     if (reuse == MAT_INPLACE_MATRIX) {
973:       MatHeaderReplace(A, &Bmpi);
974:     } else *B = Bmpi;
975:   }
976:   return 0;
977: }

979: static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct)
980: {
981:   const PetscInt *ranges;
982:   PetscMPIInt     size;
983:   PetscInt        i, n;

985:   *correct = PETSC_TRUE;
986:   MPI_Comm_size(map->comm, &size);
987:   if (size > 1) {
988:     PetscLayoutGetRanges(map, &ranges);
989:     n = ranges[1] - ranges[0];
990:     for (i = 1; i < size; i++)
991:       if (ranges[i + 1] - ranges[i] != n) break;
992:     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
993:   }
994:   return 0;
995: }

997: PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B)
998: {
999:   Mat_ScaLAPACK  *b;
1000:   Mat             Bmpi;
1001:   MPI_Comm        comm;
1002:   PetscInt        M = A->rmap->N, N = A->cmap->N, m, n;
1003:   const PetscInt *ranges, *rows, *cols;
1004:   PetscBLASInt    adesc[9], amb, zero = 0, one = 1, lld, info;
1005:   PetscScalar    *aarray;
1006:   IS              ir, ic;
1007:   PetscInt        lda;
1008:   PetscBool       flg;

1010:   PetscObjectGetComm((PetscObject)A, &comm);

1012:   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1013:   else {
1014:     MatCreate(comm, &Bmpi);
1015:     m = PETSC_DECIDE;
1016:     PetscSplitOwnershipEqual(comm, &m, &M);
1017:     n = PETSC_DECIDE;
1018:     PetscSplitOwnershipEqual(comm, &n, &N);
1019:     MatSetSizes(Bmpi, m, n, M, N);
1020:     MatSetType(Bmpi, MATSCALAPACK);
1021:     MatSetUp(Bmpi);
1022:   }
1023:   b = (Mat_ScaLAPACK *)Bmpi->data;

1025:   MatDenseGetLDA(A, &lda);
1026:   MatDenseGetArray(A, &aarray);
1027:   MatScaLAPACKCheckLayout(A->rmap, &flg);
1028:   if (flg) MatScaLAPACKCheckLayout(A->cmap, &flg);
1029:   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1030:     /* create ScaLAPACK descriptor for A (1d block distribution) */
1031:     PetscLayoutGetRanges(A->rmap, &ranges);
1032:     PetscBLASIntCast(ranges[1], &amb); /* row block size */
1033:     lld = PetscMax(lda, 1);                       /* local leading dimension */
1034:     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));

1037:     /* redistribute matrix */
1038:     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1039:     Bmpi->nooffprocentries = PETSC_TRUE;
1040:   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1042:     b->roworiented = PETSC_FALSE;
1043:     MatGetOwnershipIS(A, &ir, &ic);
1044:     ISGetIndices(ir, &rows);
1045:     ISGetIndices(ic, &cols);
1046:     MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES);
1047:     ISRestoreIndices(ir, &rows);
1048:     ISRestoreIndices(ic, &cols);
1049:     ISDestroy(&ic);
1050:     ISDestroy(&ir);
1051:   }
1052:   MatDenseRestoreArray(A, &aarray);
1053:   MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY);
1054:   MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY);
1055:   if (reuse == MAT_INPLACE_MATRIX) {
1056:     MatHeaderReplace(A, &Bmpi);
1057:   } else *B = Bmpi;
1058:   return 0;
1059: }

1061: PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1062: {
1063:   Mat                mat_scal;
1064:   PetscInt           M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1065:   const PetscInt    *cols;
1066:   const PetscScalar *vals;

1068:   if (reuse == MAT_REUSE_MATRIX) {
1069:     mat_scal = *newmat;
1070:     MatZeroEntries(mat_scal);
1071:   } else {
1072:     MatCreate(PetscObjectComm((PetscObject)A), &mat_scal);
1073:     m = PETSC_DECIDE;
1074:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M);
1075:     n = PETSC_DECIDE;
1076:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N);
1077:     MatSetSizes(mat_scal, m, n, M, N);
1078:     MatSetType(mat_scal, MATSCALAPACK);
1079:     MatSetUp(mat_scal);
1080:   }
1081:   for (row = rstart; row < rend; row++) {
1082:     MatGetRow(A, row, &ncols, &cols, &vals);
1083:     MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES);
1084:     MatRestoreRow(A, row, &ncols, &cols, &vals);
1085:   }
1086:   MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY);
1087:   MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY);

1089:   if (reuse == MAT_INPLACE_MATRIX) MatHeaderReplace(A, &mat_scal);
1090:   else *newmat = mat_scal;
1091:   return 0;
1092: }

1094: PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1095: {
1096:   Mat                mat_scal;
1097:   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1098:   const PetscInt    *cols;
1099:   const PetscScalar *vals;
1100:   PetscScalar        v;

1102:   if (reuse == MAT_REUSE_MATRIX) {
1103:     mat_scal = *newmat;
1104:     MatZeroEntries(mat_scal);
1105:   } else {
1106:     MatCreate(PetscObjectComm((PetscObject)A), &mat_scal);
1107:     m = PETSC_DECIDE;
1108:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M);
1109:     n = PETSC_DECIDE;
1110:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N);
1111:     MatSetSizes(mat_scal, m, n, M, N);
1112:     MatSetType(mat_scal, MATSCALAPACK);
1113:     MatSetUp(mat_scal);
1114:   }
1115:   MatGetRowUpperTriangular(A);
1116:   for (row = rstart; row < rend; row++) {
1117:     MatGetRow(A, row, &ncols, &cols, &vals);
1118:     MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES);
1119:     for (j = 0; j < ncols; j++) { /* lower triangular part */
1120:       if (cols[j] == row) continue;
1121:       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1122:       MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES);
1123:     }
1124:     MatRestoreRow(A, row, &ncols, &cols, &vals);
1125:   }
1126:   MatRestoreRowUpperTriangular(A);
1127:   MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY);
1128:   MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY);

1130:   if (reuse == MAT_INPLACE_MATRIX) MatHeaderReplace(A, &mat_scal);
1131:   else *newmat = mat_scal;
1132:   return 0;
1133: }

1135: static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1136: {
1137:   Mat_ScaLAPACK *a  = (Mat_ScaLAPACK *)A->data;
1138:   PetscInt       sz = 0;

1140:   PetscLayoutSetUp(A->rmap);
1141:   PetscLayoutSetUp(A->cmap);
1142:   if (!a->lld) a->lld = a->locr;

1144:   PetscFree(a->loc);
1145:   PetscIntMultError(a->lld, a->locc, &sz);
1146:   PetscCalloc1(sz, &a->loc);

1148:   A->preallocated = PETSC_TRUE;
1149:   return 0;
1150: }

1152: static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1153: {
1154:   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK *)A->data;
1155:   Mat_ScaLAPACK_Grid *grid;
1156:   PetscBool           flg;
1157:   MPI_Comm            icomm;

1159:   MatStashDestroy_Private(&A->stash);
1160:   PetscFree(a->loc);
1161:   PetscFree(a->pivots);
1162:   PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL);
1163:   MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg);
1164:   if (--grid->grid_refct == 0) {
1165:     Cblacs_gridexit(grid->ictxt);
1166:     Cblacs_gridexit(grid->ictxrow);
1167:     Cblacs_gridexit(grid->ictxcol);
1168:     PetscFree(grid);
1169:     MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval);
1170:   }
1171:   PetscCommDestroy(&icomm);
1172:   PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL);
1173:   PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL);
1174:   PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL);
1175:   PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL);
1176:   PetscFree(A->data);
1177:   return 0;
1178: }

1180: PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1181: {
1182:   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
1183:   PetscBLASInt   info = 0;
1184:   PetscBool      flg;

1186:   PetscLayoutSetUp(A->rmap);
1187:   PetscLayoutSetUp(A->cmap);

1189:   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1190:   MatScaLAPACKCheckLayout(A->rmap, &flg);
1192:   MatScaLAPACKCheckLayout(A->cmap, &flg);

1195:   /* compute local sizes */
1196:   PetscBLASIntCast(A->rmap->N, &a->M);
1197:   PetscBLASIntCast(A->cmap->N, &a->N);
1198:   a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1199:   a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1200:   a->lld  = PetscMax(1, a->locr);

1202:   /* allocate local array */
1203:   MatScaLAPACKSetPreallocation(A);

1205:   /* set up ScaLAPACK descriptor */
1206:   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1208:   return 0;
1209: }

1211: PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type)
1212: {
1213:   PetscInt nstash, reallocs;

1215:   if (A->nooffprocentries) return 0;
1216:   MatStashScatterBegin_Private(A, &A->stash, NULL);
1217:   MatStashGetInfo_Private(&A->stash, &nstash, &reallocs);
1218:   PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs);
1219:   return 0;
1220: }

1222: PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type)
1223: {
1224:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1225:   PetscMPIInt    n;
1226:   PetscInt       i, flg, *row, *col;
1227:   PetscScalar   *val;
1228:   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;

1230:   if (A->nooffprocentries) return 0;
1231:   while (1) {
1232:     MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg);
1233:     if (!flg) break;
1234:     for (i = 0; i < n; i++) {
1235:       PetscBLASIntCast(row[i] + 1, &gridx);
1236:       PetscBLASIntCast(col[i] + 1, &gcidx);
1237:       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1239:       switch (A->insertmode) {
1240:       case INSERT_VALUES:
1241:         a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i];
1242:         break;
1243:       case ADD_VALUES:
1244:         a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i];
1245:         break;
1246:       default:
1247:         SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1248:       }
1249:     }
1250:   }
1251:   MatStashScatterEnd_Private(&A->stash);
1252:   return 0;
1253: }

1255: PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer)
1256: {
1257:   Mat      Adense, As;
1258:   MPI_Comm comm;

1260:   PetscObjectGetComm((PetscObject)newMat, &comm);
1261:   MatCreate(comm, &Adense);
1262:   MatSetType(Adense, MATDENSE);
1263:   MatLoad(Adense, viewer);
1264:   MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As);
1265:   MatDestroy(&Adense);
1266:   MatHeaderReplace(newMat, &As);
1267:   return 0;
1268: }

1270: /* -------------------------------------------------------------------*/
1271: static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1272:                                        0,
1273:                                        0,
1274:                                        MatMult_ScaLAPACK,
1275:                                        /* 4*/ MatMultAdd_ScaLAPACK,
1276:                                        MatMultTranspose_ScaLAPACK,
1277:                                        MatMultTransposeAdd_ScaLAPACK,
1278:                                        MatSolve_ScaLAPACK,
1279:                                        MatSolveAdd_ScaLAPACK,
1280:                                        0,
1281:                                        /*10*/ 0,
1282:                                        MatLUFactor_ScaLAPACK,
1283:                                        MatCholeskyFactor_ScaLAPACK,
1284:                                        0,
1285:                                        MatTranspose_ScaLAPACK,
1286:                                        /*15*/ MatGetInfo_ScaLAPACK,
1287:                                        0,
1288:                                        MatGetDiagonal_ScaLAPACK,
1289:                                        MatDiagonalScale_ScaLAPACK,
1290:                                        MatNorm_ScaLAPACK,
1291:                                        /*20*/ MatAssemblyBegin_ScaLAPACK,
1292:                                        MatAssemblyEnd_ScaLAPACK,
1293:                                        MatSetOption_ScaLAPACK,
1294:                                        MatZeroEntries_ScaLAPACK,
1295:                                        /*24*/ 0,
1296:                                        MatLUFactorSymbolic_ScaLAPACK,
1297:                                        MatLUFactorNumeric_ScaLAPACK,
1298:                                        MatCholeskyFactorSymbolic_ScaLAPACK,
1299:                                        MatCholeskyFactorNumeric_ScaLAPACK,
1300:                                        /*29*/ MatSetUp_ScaLAPACK,
1301:                                        0,
1302:                                        0,
1303:                                        0,
1304:                                        0,
1305:                                        /*34*/ MatDuplicate_ScaLAPACK,
1306:                                        0,
1307:                                        0,
1308:                                        0,
1309:                                        0,
1310:                                        /*39*/ MatAXPY_ScaLAPACK,
1311:                                        0,
1312:                                        0,
1313:                                        0,
1314:                                        MatCopy_ScaLAPACK,
1315:                                        /*44*/ 0,
1316:                                        MatScale_ScaLAPACK,
1317:                                        MatShift_ScaLAPACK,
1318:                                        0,
1319:                                        0,
1320:                                        /*49*/ 0,
1321:                                        0,
1322:                                        0,
1323:                                        0,
1324:                                        0,
1325:                                        /*54*/ 0,
1326:                                        0,
1327:                                        0,
1328:                                        0,
1329:                                        0,
1330:                                        /*59*/ 0,
1331:                                        MatDestroy_ScaLAPACK,
1332:                                        MatView_ScaLAPACK,
1333:                                        0,
1334:                                        0,
1335:                                        /*64*/ 0,
1336:                                        0,
1337:                                        0,
1338:                                        0,
1339:                                        0,
1340:                                        /*69*/ 0,
1341:                                        0,
1342:                                        MatConvert_ScaLAPACK_Dense,
1343:                                        0,
1344:                                        0,
1345:                                        /*74*/ 0,
1346:                                        0,
1347:                                        0,
1348:                                        0,
1349:                                        0,
1350:                                        /*79*/ 0,
1351:                                        0,
1352:                                        0,
1353:                                        0,
1354:                                        MatLoad_ScaLAPACK,
1355:                                        /*84*/ 0,
1356:                                        0,
1357:                                        0,
1358:                                        0,
1359:                                        0,
1360:                                        /*89*/ 0,
1361:                                        0,
1362:                                        MatMatMultNumeric_ScaLAPACK,
1363:                                        0,
1364:                                        0,
1365:                                        /*94*/ 0,
1366:                                        0,
1367:                                        0,
1368:                                        MatMatTransposeMultNumeric_ScaLAPACK,
1369:                                        0,
1370:                                        /*99*/ MatProductSetFromOptions_ScaLAPACK,
1371:                                        0,
1372:                                        0,
1373:                                        MatConjugate_ScaLAPACK,
1374:                                        0,
1375:                                        /*104*/ 0,
1376:                                        0,
1377:                                        0,
1378:                                        0,
1379:                                        0,
1380:                                        /*109*/ MatMatSolve_ScaLAPACK,
1381:                                        0,
1382:                                        0,
1383:                                        0,
1384:                                        MatMissingDiagonal_ScaLAPACK,
1385:                                        /*114*/ 0,
1386:                                        0,
1387:                                        0,
1388:                                        0,
1389:                                        0,
1390:                                        /*119*/ 0,
1391:                                        MatHermitianTranspose_ScaLAPACK,
1392:                                        0,
1393:                                        0,
1394:                                        0,
1395:                                        /*124*/ 0,
1396:                                        0,
1397:                                        0,
1398:                                        0,
1399:                                        0,
1400:                                        /*129*/ 0,
1401:                                        0,
1402:                                        0,
1403:                                        0,
1404:                                        0,
1405:                                        /*134*/ 0,
1406:                                        0,
1407:                                        0,
1408:                                        0,
1409:                                        0,
1410:                                        0,
1411:                                        /*140*/ 0,
1412:                                        0,
1413:                                        0,
1414:                                        0,
1415:                                        0,
1416:                                        /*145*/ 0,
1417:                                        0,
1418:                                        0,
1419:                                        0,
1420:                                        0,
1421:                                        /*150*/ 0};

1423: static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners)
1424: {
1425:   PetscInt          *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2;
1426:   PetscInt           size = stash->size, nsends;
1427:   PetscInt           count, *sindices, **rindices, i, j, l;
1428:   PetscScalar      **rvalues, *svalues;
1429:   MPI_Comm           comm = stash->comm;
1430:   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1431:   PetscMPIInt       *sizes, *nlengths, nreceives;
1432:   PetscInt          *sp_idx, *sp_idy;
1433:   PetscScalar       *sp_val;
1434:   PetscMatStashSpace space, space_next;
1435:   PetscBLASInt       gridx, gcidx, lridx, lcidx, rsrc, csrc;
1436:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)mat->data;

1438:   { /* make sure all processors are either in INSERTMODE or ADDMODE */
1439:     InsertMode addv;
1440:     MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat));
1442:     mat->insertmode = addv; /* in case this processor had no cache */
1443:   }

1445:   bs2 = stash->bs * stash->bs;

1447:   /*  first count number of contributors to each processor */
1448:   PetscCalloc1(size, &nlengths);
1449:   PetscMalloc1(stash->n + 1, &owner);

1451:   i = j = 0;
1452:   space = stash->space_head;
1453:   while (space) {
1454:     space_next = space->next;
1455:     for (l = 0; l < space->local_used; l++) {
1456:       PetscBLASIntCast(space->idx[l] + 1, &gridx);
1457:       PetscBLASIntCast(space->idy[l] + 1, &gcidx);
1458:       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1459:       j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
1460:       nlengths[j]++;
1461:       owner[i] = j;
1462:       i++;
1463:     }
1464:     space = space_next;
1465:   }

1467:   /* Now check what procs get messages - and compute nsends. */
1468:   PetscCalloc1(size, &sizes);
1469:   for (i = 0, nsends = 0; i < size; i++) {
1470:     if (nlengths[i]) {
1471:       sizes[i] = 1;
1472:       nsends++;
1473:     }
1474:   }

1476:   {
1477:     PetscMPIInt *onodes, *olengths;
1478:     /* Determine the number of messages to expect, their lengths, from from-ids */
1479:     PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives);
1480:     PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths);
1481:     /* since clubbing row,col - lengths are multiplied by 2 */
1482:     for (i = 0; i < nreceives; i++) olengths[i] *= 2;
1483:     PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1);
1484:     /* values are size 'bs2' lengths (and remove earlier factor 2 */
1485:     for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2;
1486:     PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2);
1487:     PetscFree(onodes);
1488:     PetscFree(olengths);
1489:   }

1491:   /* do sends:
1492:       1) starts[i] gives the starting index in svalues for stuff going to
1493:          the ith processor
1494:   */
1495:   PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices);
1496:   PetscMalloc1(2 * nsends, &send_waits);
1497:   PetscMalloc2(size, &startv, size, &starti);
1498:   /* use 2 sends the first with all_a, the next with all_i and all_j */
1499:   startv[0] = 0;
1500:   starti[0] = 0;
1501:   for (i = 1; i < size; i++) {
1502:     startv[i] = startv[i - 1] + nlengths[i - 1];
1503:     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1504:   }

1506:   i     = 0;
1507:   space = stash->space_head;
1508:   while (space) {
1509:     space_next = space->next;
1510:     sp_idx     = space->idx;
1511:     sp_idy     = space->idy;
1512:     sp_val     = space->val;
1513:     for (l = 0; l < space->local_used; l++) {
1514:       j = owner[i];
1515:       if (bs2 == 1) {
1516:         svalues[startv[j]] = sp_val[l];
1517:       } else {
1518:         PetscInt     k;
1519:         PetscScalar *buf1, *buf2;
1520:         buf1 = svalues + bs2 * startv[j];
1521:         buf2 = space->val + bs2 * l;
1522:         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1523:       }
1524:       sindices[starti[j]]               = sp_idx[l];
1525:       sindices[starti[j] + nlengths[j]] = sp_idy[l];
1526:       startv[j]++;
1527:       starti[j]++;
1528:       i++;
1529:     }
1530:     space = space_next;
1531:   }
1532:   startv[0] = 0;
1533:   for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];

1535:   for (i = 0, count = 0; i < size; i++) {
1536:     if (sizes[i]) {
1537:       MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++);
1538:       MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++);
1539:     }
1540:   }
1541: #if defined(PETSC_USE_INFO)
1542:   PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends);
1543:   for (i = 0; i < size; i++) {
1544:     if (sizes[i]) PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt))));
1545:   }
1546: #endif
1547:   PetscFree(nlengths);
1548:   PetscFree(owner);
1549:   PetscFree2(startv, starti);
1550:   PetscFree(sizes);

1552:   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1553:   PetscMalloc1(2 * nreceives, &recv_waits);

1555:   for (i = 0; i < nreceives; i++) {
1556:     recv_waits[2 * i]     = recv_waits1[i];
1557:     recv_waits[2 * i + 1] = recv_waits2[i];
1558:   }
1559:   stash->recv_waits = recv_waits;

1561:   PetscFree(recv_waits1);
1562:   PetscFree(recv_waits2);

1564:   stash->svalues         = svalues;
1565:   stash->sindices        = sindices;
1566:   stash->rvalues         = rvalues;
1567:   stash->rindices        = rindices;
1568:   stash->send_waits      = send_waits;
1569:   stash->nsends          = nsends;
1570:   stash->nrecvs          = nreceives;
1571:   stash->reproduce_count = 0;
1572:   return 0;
1573: }

1575: static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb)
1576: {
1577:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;

1582:   PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb);
1583:   PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb);
1584:   return 0;
1585: }

1587: /*@
1588:    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1589:    the `MATSCALAPACK` matrix

1591:    Logically Collective

1593:    Input Parameters:
1594: +  A  - a `MATSCALAPACK` matrix
1595: .  mb - the row block size
1596: -  nb - the column block size

1598:    Level: intermediate

1600: .seealso: `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1601: @*/
1602: PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb)
1603: {
1607:   PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
1608:   return 0;
1609: }

1611: static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb)
1612: {
1613:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;

1615:   if (mb) *mb = a->mb;
1616:   if (nb) *nb = a->nb;
1617:   return 0;
1618: }

1620: /*@
1621:    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1622:    the `MATSCALAPACK` matrix

1624:    Not collective

1626:    Input Parameter:
1627: .  A  - a `MATSCALAPACK` matrix

1629:    Output Parameters:
1630: +  mb - the row block size
1631: -  nb - the column block size

1633:    Level: intermediate

1635: .seealso: `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1636: @*/
1637: PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb)
1638: {
1640:   PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
1641:   return 0;
1642: }

1644: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1645: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);

1647: /*MC
1648:    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package

1650:    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK

1652:    Options Database Keys:
1653: +  -mat_type scalapack - sets the matrix type to `MATSCALAPACK` during a call to `MatSetFromOptions()`
1654: .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option -pc_type lu
1655: .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1656: -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)

1658:   Note:
1659:    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1660:    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1661:    the given rank.

1663:    Level: beginner

1665: .seealso: `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`
1666: M*/

1668: PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1669: {
1670:   Mat_ScaLAPACK      *a;
1671:   PetscBool           flg, flg1;
1672:   Mat_ScaLAPACK_Grid *grid;
1673:   MPI_Comm            icomm;
1674:   PetscBLASInt        nprow, npcol, myrow, mycol;
1675:   PetscInt            optv1, k = 2, array[2] = {0, 0};
1676:   PetscMPIInt         size;

1678:   PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps));
1679:   A->insertmode = NOT_SET_VALUES;

1681:   MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash);
1682:   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1683:   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1684:   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1685:   A->stash.ScatterDestroy = NULL;

1687:   PetscNew(&a);
1688:   A->data = (void *)a;

1690:   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1691:   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1692:     MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0);
1693:     PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);
1694:     PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite);
1695:   }
1696:   PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL);
1697:   MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg);
1698:   if (!flg) {
1699:     PetscNew(&grid);

1701:     MPI_Comm_size(icomm, &size);
1702:     grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001);

1704:     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
1705:     PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1);
1706:     if (flg1) {
1708:       grid->nprow = optv1;
1709:     }
1710:     PetscOptionsEnd();

1712:     if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1713:     grid->npcol = size / grid->nprow;
1714:     PetscBLASIntCast(grid->nprow, &nprow);
1715:     PetscBLASIntCast(grid->npcol, &npcol);
1716:     grid->ictxt = Csys2blacs_handle(icomm);
1717:     Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1718:     Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1719:     grid->grid_refct = 1;
1720:     grid->nprow      = nprow;
1721:     grid->npcol      = npcol;
1722:     grid->myrow      = myrow;
1723:     grid->mycol      = mycol;
1724:     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1725:     grid->ictxrow = Csys2blacs_handle(icomm);
1726:     Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1727:     grid->ictxcol = Csys2blacs_handle(icomm);
1728:     Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
1729:     MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid);

1731:   } else grid->grid_refct++;
1732:   PetscCommDestroy(&icomm);
1733:   a->grid = grid;
1734:   a->mb   = DEFAULT_BLOCKSIZE;
1735:   a->nb   = DEFAULT_BLOCKSIZE;

1737:   PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
1738:   PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg);
1739:   if (flg) {
1740:     a->mb = array[0];
1741:     a->nb = (k > 1) ? array[1] : a->mb;
1742:   }
1743:   PetscOptionsEnd();

1745:   a->roworiented = PETSC_TRUE;
1746:   PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK);
1747:   PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK);
1748:   PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK);
1749:   PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK);
1750:   return 0;
1751: }

1753: /*@C
1754:    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1755:    (2D block cyclic distribution) for a `MATSCALAPACK` matrix

1757:    Collective

1759:    Input Parameters:
1760: +  comm - MPI communicator
1761: .  mb   - row block size (or `PETSC_DECIDE` to have it set)
1762: .  nb   - column block size (or `PETSC_DECIDE` to have it set)
1763: .  M    - number of global rows
1764: .  N    - number of global columns
1765: .  rsrc - coordinate of process that owns the first row of the distributed matrix
1766: -  csrc - coordinate of process that owns the first column of the distributed matrix

1768:    Output Parameter:
1769: .  A - the matrix

1771:    Options Database Key:
1772: .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)

1774:    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1775:    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1776:    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]

1778:    Note:
1779:    If `PETSC_DECIDE` is used for the block sizes, then an appropriate value
1780:    is chosen.

1782:    Storage Information:
1783:    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1784:    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1785:    significance and are thus ignored. The block sizes refer to the values
1786:    used for the distributed matrix, not the same meaning as in `MATBAIJ`.

1788:    Level: intermediate

1790: .seealso: `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1791: @*/
1792: PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A)
1793: {
1794:   Mat_ScaLAPACK *a;
1795:   PetscInt       m, n;

1797:   MatCreate(comm, A);
1798:   MatSetType(*A, MATSCALAPACK);
1800:   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1801:   m = PETSC_DECIDE;
1802:   PetscSplitOwnershipEqual(comm, &m, &M);
1803:   n = PETSC_DECIDE;
1804:   PetscSplitOwnershipEqual(comm, &n, &N);
1805:   MatSetSizes(*A, m, n, M, N);
1806:   a = (Mat_ScaLAPACK *)(*A)->data;
1807:   PetscBLASIntCast(M, &a->M);
1808:   PetscBLASIntCast(N, &a->N);
1809:   PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb);
1810:   PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb);
1811:   PetscBLASIntCast(rsrc, &a->rsrc);
1812:   PetscBLASIntCast(csrc, &a->csrc);
1813:   MatSetUp(*A);
1814:   return 0;
1815: }