Actual source code: matscalapack.c
1: #include <petsc/private/petscscalapack.h>
3: const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4: " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5: " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6: " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7: " TITLE = {Sca{LAPACK} Users' Guide},\n"
8: " PUBLISHER = {SIAM},\n"
9: " ADDRESS = {Philadelphia, PA},\n"
10: " YEAR = 1997\n"
11: "}\n";
12: static PetscBool ScaLAPACKCite = PETSC_FALSE;
14: #define DEFAULT_BLOCKSIZE 64
16: /*
17: The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18: is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19: */
20: static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
22: static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23: {
24: PetscFunctionBegin;
25: PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
26: PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
27: PetscFunctionReturn(PETSC_SUCCESS);
28: }
30: static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer)
31: {
32: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
33: PetscBool iascii;
34: PetscViewerFormat format;
35: Mat Adense;
37: PetscFunctionBegin;
38: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
39: if (iascii) {
40: PetscCall(PetscViewerGetFormat(viewer, &format));
41: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
42: PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
43: PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
44: PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
45: PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
46: PetscFunctionReturn(PETSC_SUCCESS);
47: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
48: PetscFunctionReturn(PETSC_SUCCESS);
49: }
50: }
51: /* convert to dense format and call MatView() */
52: PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
53: PetscCall(MatView(Adense, viewer));
54: PetscCall(MatDestroy(&Adense));
55: PetscFunctionReturn(PETSC_SUCCESS);
56: }
58: static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info)
59: {
60: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
61: PetscLogDouble isend[2], irecv[2];
63: PetscFunctionBegin;
64: info->block_size = 1.0;
66: isend[0] = a->lld * a->locc; /* locally allocated */
67: isend[1] = a->locr * a->locc; /* used submatrix */
68: if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69: info->nz_allocated = isend[0];
70: info->nz_used = isend[1];
71: } else if (flag == MAT_GLOBAL_MAX) {
72: PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
73: info->nz_allocated = irecv[0];
74: info->nz_used = irecv[1];
75: } else if (flag == MAT_GLOBAL_SUM) {
76: PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
77: info->nz_allocated = irecv[0];
78: info->nz_used = irecv[1];
79: }
81: info->nz_unneeded = 0;
82: info->assemblies = A->num_ass;
83: info->mallocs = 0;
84: info->memory = 0; /* REVIEW ME */
85: info->fill_ratio_given = 0;
86: info->fill_ratio_needed = 0;
87: info->factor_mallocs = 0;
88: PetscFunctionReturn(PETSC_SUCCESS);
89: }
91: static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg)
92: {
93: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
95: PetscFunctionBegin;
96: switch (op) {
97: case MAT_NEW_NONZERO_LOCATIONS:
98: case MAT_NEW_NONZERO_LOCATION_ERR:
99: case MAT_NEW_NONZERO_ALLOCATION_ERR:
100: case MAT_SYMMETRIC:
101: case MAT_SORTED_FULL:
102: case MAT_HERMITIAN:
103: break;
104: case MAT_ROW_ORIENTED:
105: a->roworiented = flg;
106: break;
107: default:
108: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
109: }
110: PetscFunctionReturn(PETSC_SUCCESS);
111: }
113: static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
114: {
115: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
116: PetscInt i, j;
117: PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc;
118: PetscBool roworiented = a->roworiented;
120: PetscFunctionBegin;
121: PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
122: for (i = 0; i < nr; i++) {
123: if (rows[i] < 0) continue;
124: PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
125: for (j = 0; j < nc; j++) {
126: if (cols[j] < 0) continue;
127: PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
128: PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
129: if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
130: if (roworiented) {
131: switch (imode) {
132: case INSERT_VALUES:
133: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j];
134: break;
135: default:
136: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j];
137: break;
138: }
139: } else {
140: switch (imode) {
141: case INSERT_VALUES:
142: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr];
143: break;
144: default:
145: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr];
146: break;
147: }
148: }
149: } else {
150: PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
151: A->assembled = PETSC_FALSE;
152: PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
153: }
154: }
155: }
156: PetscFunctionReturn(PETSC_SUCCESS);
157: }
159: static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscBool hermitian, PetscScalar beta, const PetscScalar *x, PetscScalar *y)
160: {
161: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
162: PetscScalar *x2d, *y2d, alpha = 1.0;
163: const PetscInt *ranges;
164: PetscBLASInt xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
166: PetscFunctionBegin;
167: if (transpose) {
168: /* create ScaLAPACK descriptors for vectors (1d block distribution) */
169: PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
170: PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
171: PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld));
172: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
173: PetscCheckScaLapackInfo("descinit", info);
174: PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
175: PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
176: ylld = 1;
177: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
178: PetscCheckScaLapackInfo("descinit", info);
180: /* allocate 2d vectors */
181: lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
182: lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
183: PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
184: PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld));
186: /* create ScaLAPACK descriptors for vectors (2d block distribution) */
187: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
188: PetscCheckScaLapackInfo("descinit", info);
189: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
190: PetscCheckScaLapackInfo("descinit", info);
192: /* redistribute x as a column of a 2d matrix */
193: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
195: /* redistribute y as a row of a 2d matrix */
196: if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
198: /* call PBLAS subroutine */
199: if (hermitian) PetscCallBLAS("PBLASgemv", PBLASgemv_("C", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
200: else PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
202: /* redistribute y from a row of a 2d matrix */
203: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
205: } else { /* non-transpose */
207: /* create ScaLAPACK descriptors for vectors (1d block distribution) */
208: PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
209: PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
210: xlld = 1;
211: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
212: PetscCheckScaLapackInfo("descinit", info);
213: PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
214: PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
215: PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &ylld));
216: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
217: PetscCheckScaLapackInfo("descinit", info);
219: /* allocate 2d vectors */
220: lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
221: lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
222: PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
223: PetscCall(PetscBLASIntCast(PetscMax(1, lszy), &ylld));
225: /* create ScaLAPACK descriptors for vectors (2d block distribution) */
226: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
227: PetscCheckScaLapackInfo("descinit", info);
228: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
229: PetscCheckScaLapackInfo("descinit", info);
231: /* redistribute x as a row of a 2d matrix */
232: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
234: /* redistribute y as a column of a 2d matrix */
235: if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
237: /* call PBLAS subroutine */
238: PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
240: /* redistribute y from a column of a 2d matrix */
241: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
242: }
243: PetscCall(PetscFree2(x2d, y2d));
244: PetscFunctionReturn(PETSC_SUCCESS);
245: }
247: static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y)
248: {
249: const PetscScalar *xarray;
250: PetscScalar *yarray;
252: PetscFunctionBegin;
253: PetscCall(VecGetArrayRead(x, &xarray));
254: PetscCall(VecGetArray(y, &yarray));
255: PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 0.0, xarray, yarray));
256: PetscCall(VecRestoreArrayRead(x, &xarray));
257: PetscCall(VecRestoreArray(y, &yarray));
258: PetscFunctionReturn(PETSC_SUCCESS);
259: }
261: static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
262: {
263: const PetscScalar *xarray;
264: PetscScalar *yarray;
266: PetscFunctionBegin;
267: PetscCall(VecGetArrayRead(x, &xarray));
268: PetscCall(VecGetArray(y, &yarray));
269: PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 0.0, xarray, yarray));
270: PetscCall(VecRestoreArrayRead(x, &xarray));
271: PetscCall(VecRestoreArray(y, &yarray));
272: PetscFunctionReturn(PETSC_SUCCESS);
273: }
275: static PetscErrorCode MatMultHermitianTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
276: {
277: const PetscScalar *xarray;
278: PetscScalar *yarray;
280: PetscFunctionBegin;
281: PetscCall(VecGetArrayRead(x, &xarray));
282: PetscCall(VecGetArrayWrite(y, &yarray));
283: PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 0.0, xarray, yarray));
284: PetscCall(VecRestoreArrayRead(x, &xarray));
285: PetscCall(VecRestoreArrayWrite(y, &yarray));
286: PetscFunctionReturn(PETSC_SUCCESS);
287: }
289: static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
290: {
291: const PetscScalar *xarray;
292: PetscScalar *zarray;
294: PetscFunctionBegin;
295: if (y != z) PetscCall(VecCopy(y, z));
296: PetscCall(VecGetArrayRead(x, &xarray));
297: PetscCall(VecGetArray(z, &zarray));
298: PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 1.0, xarray, zarray));
299: PetscCall(VecRestoreArrayRead(x, &xarray));
300: PetscCall(VecRestoreArray(z, &zarray));
301: PetscFunctionReturn(PETSC_SUCCESS);
302: }
304: static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
305: {
306: const PetscScalar *xarray;
307: PetscScalar *zarray;
309: PetscFunctionBegin;
310: if (y != z) PetscCall(VecCopy(y, z));
311: PetscCall(VecGetArrayRead(x, &xarray));
312: PetscCall(VecGetArray(z, &zarray));
313: PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 1.0, xarray, zarray));
314: PetscCall(VecRestoreArrayRead(x, &xarray));
315: PetscCall(VecRestoreArray(z, &zarray));
316: PetscFunctionReturn(PETSC_SUCCESS);
317: }
319: static PetscErrorCode MatMultHermitianTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
320: {
321: const PetscScalar *xarray;
322: PetscScalar *zarray;
324: PetscFunctionBegin;
325: if (y != z) PetscCall(VecCopy(y, z));
326: PetscCall(VecGetArrayRead(x, &xarray));
327: PetscCall(VecGetArray(z, &zarray));
328: PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 1.0, xarray, zarray));
329: PetscCall(VecRestoreArrayRead(x, &xarray));
330: PetscCall(VecRestoreArray(z, &zarray));
331: PetscFunctionReturn(PETSC_SUCCESS);
332: }
334: PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
335: {
336: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
337: Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
338: Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data;
339: PetscScalar sone = 1.0, zero = 0.0;
340: PetscBLASInt one = 1;
342: PetscFunctionBegin;
343: PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
344: C->assembled = PETSC_TRUE;
345: PetscFunctionReturn(PETSC_SUCCESS);
346: }
348: PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
349: {
350: PetscFunctionBegin;
351: PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
352: PetscCall(MatSetType(C, MATSCALAPACK));
353: PetscCall(MatSetUp(C));
354: C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
355: PetscFunctionReturn(PETSC_SUCCESS);
356: }
358: static PetscErrorCode MatTransposeMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
359: {
360: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
361: Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
362: Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data;
363: PetscScalar sone = 1.0, zero = 0.0;
364: PetscBLASInt one = 1;
366: PetscFunctionBegin;
367: PetscCallBLAS("PBLASgemm", PBLASgemm_("T", "N", &a->N, &b->N, &a->M, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
368: C->assembled = PETSC_TRUE;
369: PetscFunctionReturn(PETSC_SUCCESS);
370: }
372: static PetscErrorCode MatTransposeMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
373: {
374: PetscFunctionBegin;
375: PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
376: PetscCall(MatSetType(C, MATSCALAPACK));
377: PetscCall(MatSetUp(C));
378: C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_ScaLAPACK;
379: PetscFunctionReturn(PETSC_SUCCESS);
380: }
382: static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
383: {
384: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
385: Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
386: Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data;
387: PetscScalar sone = 1.0, zero = 0.0;
388: PetscBLASInt one = 1;
390: PetscFunctionBegin;
391: PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
392: C->assembled = PETSC_TRUE;
393: PetscFunctionReturn(PETSC_SUCCESS);
394: }
396: static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
397: {
398: PetscFunctionBegin;
399: PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
400: PetscCall(MatSetType(C, MATSCALAPACK));
401: PetscCall(MatSetUp(C));
402: C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_ScaLAPACK;
403: PetscFunctionReturn(PETSC_SUCCESS);
404: }
406: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
407: {
408: Mat_Product *product = C->product;
410: PetscFunctionBegin;
411: switch (product->type) {
412: case MATPRODUCT_AB:
413: C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
414: C->ops->productsymbolic = MatProductSymbolic_AB;
415: break;
416: case MATPRODUCT_AtB:
417: C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_ScaLAPACK;
418: C->ops->productsymbolic = MatProductSymbolic_AtB;
419: break;
420: case MATPRODUCT_ABt:
421: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
422: C->ops->productsymbolic = MatProductSymbolic_ABt;
423: break;
424: default:
425: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
426: }
427: PetscFunctionReturn(PETSC_SUCCESS);
428: }
430: static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D)
431: {
432: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
433: PetscScalar *darray, *d2d, v;
434: const PetscInt *ranges;
435: PetscBLASInt j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
437: PetscFunctionBegin;
438: PetscCall(VecGetArray(D, &darray));
440: if (A->rmap->N <= A->cmap->N) { /* row version */
442: /* create ScaLAPACK descriptor for vector (1d block distribution) */
443: PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
444: PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
445: PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld));
446: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
447: PetscCheckScaLapackInfo("descinit", info);
449: /* allocate 2d vector */
450: lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
451: PetscCall(PetscCalloc1(lszd, &d2d));
452: PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld));
454: /* create ScaLAPACK descriptor for vector (2d block distribution) */
455: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
456: PetscCheckScaLapackInfo("descinit", info);
458: /* collect diagonal */
459: for (j = 1; j <= a->M; j++) {
460: PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
461: PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
462: }
464: /* redistribute d from a column of a 2d matrix */
465: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
466: PetscCall(PetscFree(d2d));
468: } else { /* column version */
470: /* create ScaLAPACK descriptor for vector (1d block distribution) */
471: PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
472: PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
473: dlld = 1;
474: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
475: PetscCheckScaLapackInfo("descinit", info);
477: /* allocate 2d vector */
478: lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
479: PetscCall(PetscCalloc1(lszd, &d2d));
481: /* create ScaLAPACK descriptor for vector (2d block distribution) */
482: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
483: PetscCheckScaLapackInfo("descinit", info);
485: /* collect diagonal */
486: for (j = 1; j <= a->N; j++) {
487: PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
488: PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
489: }
491: /* redistribute d from a row of a 2d matrix */
492: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
493: PetscCall(PetscFree(d2d));
494: }
496: PetscCall(VecRestoreArray(D, &darray));
497: PetscCall(VecAssemblyBegin(D));
498: PetscCall(VecAssemblyEnd(D));
499: PetscFunctionReturn(PETSC_SUCCESS);
500: }
502: static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R)
503: {
504: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
505: const PetscScalar *d;
506: const PetscInt *ranges;
507: PetscScalar *d2d;
508: PetscBLASInt i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
510: PetscFunctionBegin;
511: if (R) {
512: PetscCall(VecGetArrayRead(R, &d));
513: /* create ScaLAPACK descriptor for vector (1d block distribution) */
514: PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
515: PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
516: dlld = 1;
517: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
518: PetscCheckScaLapackInfo("descinit", info);
520: /* allocate 2d vector */
521: lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
522: PetscCall(PetscCalloc1(lszd, &d2d));
524: /* create ScaLAPACK descriptor for vector (2d block distribution) */
525: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
526: PetscCheckScaLapackInfo("descinit", info);
528: /* redistribute d to a row of a 2d matrix */
529: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
531: /* broadcast along process columns */
532: if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
533: else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
535: /* local scaling */
536: for (j = 0; j < a->locc; j++)
537: for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
539: PetscCall(PetscFree(d2d));
540: PetscCall(VecRestoreArrayRead(R, &d));
541: }
542: if (L) {
543: PetscCall(VecGetArrayRead(L, &d));
544: /* create ScaLAPACK descriptor for vector (1d block distribution) */
545: PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
546: PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
547: PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld));
548: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
549: PetscCheckScaLapackInfo("descinit", info);
551: /* allocate 2d vector */
552: lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
553: PetscCall(PetscCalloc1(lszd, &d2d));
554: PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld));
556: /* create ScaLAPACK descriptor for vector (2d block distribution) */
557: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
558: PetscCheckScaLapackInfo("descinit", info);
560: /* redistribute d to a column of a 2d matrix */
561: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
563: /* broadcast along process rows */
564: if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
565: else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
567: /* local scaling */
568: for (i = 0; i < a->locr; i++)
569: for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
571: PetscCall(PetscFree(d2d));
572: PetscCall(VecRestoreArrayRead(L, &d));
573: }
574: PetscFunctionReturn(PETSC_SUCCESS);
575: }
577: static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d)
578: {
579: PetscFunctionBegin;
580: *missing = PETSC_FALSE;
581: PetscFunctionReturn(PETSC_SUCCESS);
582: }
584: static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a)
585: {
586: Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
587: PetscBLASInt n, one = 1;
589: PetscFunctionBegin;
590: n = x->lld * x->locc;
591: PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
592: PetscFunctionReturn(PETSC_SUCCESS);
593: }
595: static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha)
596: {
597: Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
598: PetscBLASInt i, n;
599: PetscScalar v;
601: PetscFunctionBegin;
602: n = PetscMin(x->M, x->N);
603: for (i = 1; i <= n; i++) {
604: PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
605: v += alpha;
606: PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
607: }
608: PetscFunctionReturn(PETSC_SUCCESS);
609: }
611: static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
612: {
613: Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
614: Mat_ScaLAPACK *y = (Mat_ScaLAPACK *)Y->data;
615: PetscBLASInt one = 1;
616: PetscScalar beta = 1.0;
618: PetscFunctionBegin;
619: MatScaLAPACKCheckDistribution(Y, 1, X, 3);
620: PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
621: PetscCall(PetscObjectStateIncrease((PetscObject)Y));
622: PetscFunctionReturn(PETSC_SUCCESS);
623: }
625: static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str)
626: {
627: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
628: Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
630: PetscFunctionBegin;
631: PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
632: PetscCall(PetscObjectStateIncrease((PetscObject)B));
633: PetscFunctionReturn(PETSC_SUCCESS);
634: }
636: static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B)
637: {
638: Mat Bs;
639: MPI_Comm comm;
640: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
642: PetscFunctionBegin;
643: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
644: PetscCall(MatCreate(comm, &Bs));
645: PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
646: PetscCall(MatSetType(Bs, MATSCALAPACK));
647: b = (Mat_ScaLAPACK *)Bs->data;
648: b->M = a->M;
649: b->N = a->N;
650: b->mb = a->mb;
651: b->nb = a->nb;
652: b->rsrc = a->rsrc;
653: b->csrc = a->csrc;
654: PetscCall(MatSetUp(Bs));
655: *B = Bs;
656: if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
657: Bs->assembled = PETSC_TRUE;
658: PetscFunctionReturn(PETSC_SUCCESS);
659: }
661: static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
662: {
663: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
664: Mat Bs = *B;
665: PetscBLASInt one = 1;
666: PetscScalar sone = 1.0, zero = 0.0;
667: #if defined(PETSC_USE_COMPLEX)
668: PetscInt i, j;
669: #endif
671: PetscFunctionBegin;
672: if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
673: PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
674: PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
675: *B = Bs;
676: b = (Mat_ScaLAPACK *)Bs->data;
677: PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
678: #if defined(PETSC_USE_COMPLEX)
679: /* undo conjugation */
680: for (i = 0; i < b->locr; i++)
681: for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
682: #endif
683: Bs->assembled = PETSC_TRUE;
684: PetscFunctionReturn(PETSC_SUCCESS);
685: }
687: static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
688: {
689: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
690: PetscInt i, j;
692: PetscFunctionBegin;
693: for (i = 0; i < a->locr; i++)
694: for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
695: PetscFunctionReturn(PETSC_SUCCESS);
696: }
698: static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
699: {
700: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
701: Mat Bs = *B;
702: PetscBLASInt one = 1;
703: PetscScalar sone = 1.0, zero = 0.0;
705: PetscFunctionBegin;
706: PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
707: PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
708: *B = Bs;
709: b = (Mat_ScaLAPACK *)Bs->data;
710: PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
711: Bs->assembled = PETSC_TRUE;
712: PetscFunctionReturn(PETSC_SUCCESS);
713: }
715: static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X)
716: {
717: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
718: PetscScalar *x, *x2d;
719: const PetscInt *ranges;
720: PetscBLASInt xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
722: PetscFunctionBegin;
723: PetscCall(VecCopy(B, X));
724: PetscCall(VecGetArray(X, &x));
726: /* create ScaLAPACK descriptor for a vector (1d block distribution) */
727: PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
728: PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
729: PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld));
730: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
731: PetscCheckScaLapackInfo("descinit", info);
733: /* allocate 2d vector */
734: lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
735: PetscCall(PetscMalloc1(lszx, &x2d));
736: PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld));
738: /* create ScaLAPACK descriptor for a vector (2d block distribution) */
739: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
740: PetscCheckScaLapackInfo("descinit", info);
742: /* redistribute x as a column of a 2d matrix */
743: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
745: /* call ScaLAPACK subroutine */
746: switch (A->factortype) {
747: case MAT_FACTOR_LU:
748: PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
749: PetscCheckScaLapackInfo("getrs", info);
750: break;
751: case MAT_FACTOR_CHOLESKY:
752: PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
753: PetscCheckScaLapackInfo("potrs", info);
754: break;
755: default:
756: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
757: }
759: /* redistribute x from a column of a 2d matrix */
760: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
762: PetscCall(PetscFree(x2d));
763: PetscCall(VecRestoreArray(X, &x));
764: PetscFunctionReturn(PETSC_SUCCESS);
765: }
767: static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X)
768: {
769: PetscFunctionBegin;
770: PetscCall(MatSolve_ScaLAPACK(A, B, X));
771: PetscCall(VecAXPY(X, 1, Y));
772: PetscFunctionReturn(PETSC_SUCCESS);
773: }
775: static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X)
776: {
777: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *x;
778: PetscBool flg1, flg2;
779: PetscBLASInt one = 1, info;
780: Mat C;
781: MatType type;
783: PetscFunctionBegin;
784: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
785: PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
786: if (flg1 && flg2) MatScaLAPACKCheckDistribution(B, 2, X, 3);
787: if (flg2) {
788: if (flg1) PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN));
789: else PetscCall(MatConvert(B, MATSCALAPACK, MAT_REUSE_MATRIX, &X));
790: C = X;
791: } else {
792: PetscCall(MatConvert(B, MATSCALAPACK, MAT_INITIAL_MATRIX, &C));
793: }
794: x = (Mat_ScaLAPACK *)C->data;
796: switch (A->factortype) {
797: case MAT_FACTOR_LU:
798: PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
799: PetscCheckScaLapackInfo("getrs", info);
800: break;
801: case MAT_FACTOR_CHOLESKY:
802: PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
803: PetscCheckScaLapackInfo("potrs", info);
804: break;
805: default:
806: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
807: }
808: if (!flg2) {
809: PetscCall(MatGetType(X, &type));
810: PetscCall(MatConvert(C, type, MAT_REUSE_MATRIX, &X));
811: PetscCall(MatDestroy(&C));
812: }
813: PetscFunctionReturn(PETSC_SUCCESS);
814: }
816: static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo)
817: {
818: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
819: PetscBLASInt one = 1, info;
821: PetscFunctionBegin;
822: if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots));
823: PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
824: PetscCheckScaLapackInfo("getrf", info);
825: A->factortype = MAT_FACTOR_LU;
826: A->assembled = PETSC_TRUE;
828: PetscCall(PetscFree(A->solvertype));
829: PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
830: PetscFunctionReturn(PETSC_SUCCESS);
831: }
833: static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
834: {
835: PetscFunctionBegin;
836: PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
837: PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
838: PetscFunctionReturn(PETSC_SUCCESS);
839: }
841: static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
842: {
843: PetscFunctionBegin;
844: /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
845: PetscFunctionReturn(PETSC_SUCCESS);
846: }
848: static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo)
849: {
850: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
851: PetscBLASInt one = 1, info;
853: PetscFunctionBegin;
854: PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
855: PetscCheckScaLapackInfo("potrf", info);
856: A->factortype = MAT_FACTOR_CHOLESKY;
857: A->assembled = PETSC_TRUE;
859: PetscCall(PetscFree(A->solvertype));
860: PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
861: PetscFunctionReturn(PETSC_SUCCESS);
862: }
864: static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
865: {
866: PetscFunctionBegin;
867: PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
868: PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
869: PetscFunctionReturn(PETSC_SUCCESS);
870: }
872: static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info)
873: {
874: PetscFunctionBegin;
875: /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
876: PetscFunctionReturn(PETSC_SUCCESS);
877: }
879: static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type)
880: {
881: PetscFunctionBegin;
882: *type = MATSOLVERSCALAPACK;
883: PetscFunctionReturn(PETSC_SUCCESS);
884: }
886: static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F)
887: {
888: Mat B;
889: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
891: PetscFunctionBegin;
892: /* Create the factorization matrix */
893: PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
894: B->trivialsymbolic = PETSC_TRUE;
895: B->factortype = ftype;
896: PetscCall(PetscFree(B->solvertype));
897: PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));
899: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
900: *F = B;
901: PetscFunctionReturn(PETSC_SUCCESS);
902: }
904: PETSC_INTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
905: {
906: PetscFunctionBegin;
907: PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
908: PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
909: PetscFunctionReturn(PETSC_SUCCESS);
910: }
912: static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm)
913: {
914: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
915: PetscBLASInt one = 1, lwork = 0;
916: const char *ntype;
917: PetscScalar *work = NULL, dummy;
919: PetscFunctionBegin;
920: switch (type) {
921: case NORM_1:
922: ntype = "1";
923: lwork = PetscMax(a->locr, a->locc);
924: break;
925: case NORM_FROBENIUS:
926: ntype = "F";
927: work = &dummy;
928: break;
929: case NORM_INFINITY:
930: ntype = "I";
931: lwork = PetscMax(a->locr, a->locc);
932: break;
933: default:
934: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
935: }
936: if (lwork) PetscCall(PetscMalloc1(lwork, &work));
937: *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
938: if (lwork) PetscCall(PetscFree(work));
939: PetscFunctionReturn(PETSC_SUCCESS);
940: }
942: static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
943: {
944: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
946: PetscFunctionBegin;
947: PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
948: PetscFunctionReturn(PETSC_SUCCESS);
949: }
951: static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols)
952: {
953: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
954: PetscInt i, n, nb, isrc, nproc, iproc, *idx;
956: PetscFunctionBegin;
957: if (rows) {
958: n = a->locr;
959: nb = a->mb;
960: isrc = a->rsrc;
961: nproc = a->grid->nprow;
962: iproc = a->grid->myrow;
963: PetscCall(PetscMalloc1(n, &idx));
964: for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
965: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
966: }
967: if (cols) {
968: n = a->locc;
969: nb = a->nb;
970: isrc = a->csrc;
971: nproc = a->grid->npcol;
972: iproc = a->grid->mycol;
973: PetscCall(PetscMalloc1(n, &idx));
974: for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
975: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
976: }
977: PetscFunctionReturn(PETSC_SUCCESS);
978: }
980: static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B)
981: {
982: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
983: Mat Bmpi;
984: MPI_Comm comm;
985: PetscInt i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb;
986: const PetscInt *ranges, *branges, *cwork;
987: const PetscScalar *vwork;
988: PetscBLASInt bdesc[9], bmb, zero = 0, one = 1, lld, info;
989: PetscScalar *barray;
990: PetscBool differ = PETSC_FALSE;
991: PetscMPIInt size;
993: PetscFunctionBegin;
994: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
995: PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
997: if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
998: PetscCallMPI(MPI_Comm_size(comm, &size));
999: PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
1000: for (i = 0; i < size; i++)
1001: if (ranges[i + 1] != branges[i + 1]) {
1002: differ = PETSC_TRUE;
1003: break;
1004: }
1005: }
1007: if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
1008: PetscCall(MatCreate(comm, &Bmpi));
1009: m = PETSC_DECIDE;
1010: PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1011: n = PETSC_DECIDE;
1012: PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1013: PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1014: PetscCall(MatSetType(Bmpi, MATDENSE));
1015: PetscCall(MatSetUp(Bmpi));
1017: /* create ScaLAPACK descriptor for B (1d block distribution) */
1018: PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1019: PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1020: PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */
1021: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1022: PetscCheckScaLapackInfo("descinit", info);
1024: /* redistribute matrix */
1025: PetscCall(MatDenseGetArray(Bmpi, &barray));
1026: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1027: PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1028: PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1029: PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1031: /* transfer rows of auxiliary matrix to the final matrix B */
1032: PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
1033: for (i = rstart; i < rend; i++) {
1034: PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
1035: PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
1036: PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
1037: }
1038: PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
1039: PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
1040: PetscCall(MatDestroy(&Bmpi));
1042: } else { /* normal cases */
1044: if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1045: else {
1046: PetscCall(MatCreate(comm, &Bmpi));
1047: m = PETSC_DECIDE;
1048: PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1049: n = PETSC_DECIDE;
1050: PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1051: PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1052: PetscCall(MatSetType(Bmpi, MATDENSE));
1053: PetscCall(MatSetUp(Bmpi));
1054: }
1056: /* create ScaLAPACK descriptor for B (1d block distribution) */
1057: PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1058: PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1059: PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */
1060: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1061: PetscCheckScaLapackInfo("descinit", info);
1063: /* redistribute matrix */
1064: PetscCall(MatDenseGetArray(Bmpi, &barray));
1065: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1066: PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1068: PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1069: PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1070: if (reuse == MAT_INPLACE_MATRIX) {
1071: PetscCall(MatHeaderReplace(A, &Bmpi));
1072: } else *B = Bmpi;
1073: }
1074: PetscFunctionReturn(PETSC_SUCCESS);
1075: }
1077: static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct)
1078: {
1079: const PetscInt *ranges;
1080: PetscMPIInt size;
1081: PetscInt i, n;
1083: PetscFunctionBegin;
1084: *correct = PETSC_TRUE;
1085: PetscCallMPI(MPI_Comm_size(map->comm, &size));
1086: if (size > 1) {
1087: PetscCall(PetscLayoutGetRanges(map, &ranges));
1088: n = ranges[1] - ranges[0];
1089: for (i = 1; i < size; i++)
1090: if (ranges[i + 1] - ranges[i] != n) break;
1091: *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
1092: }
1093: PetscFunctionReturn(PETSC_SUCCESS);
1094: }
1096: PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B)
1097: {
1098: Mat_ScaLAPACK *b;
1099: Mat Bmpi;
1100: MPI_Comm comm;
1101: PetscInt M = A->rmap->N, N = A->cmap->N, m, n;
1102: const PetscInt *ranges, *rows, *cols;
1103: PetscBLASInt adesc[9], amb, zero = 0, one = 1, lld, info;
1104: const PetscScalar *aarray;
1105: IS ir, ic;
1106: PetscInt lda;
1107: PetscBool flg;
1109: PetscFunctionBegin;
1110: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1112: if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1113: else {
1114: PetscCall(MatCreate(comm, &Bmpi));
1115: m = PETSC_DECIDE;
1116: PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1117: n = PETSC_DECIDE;
1118: PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1119: PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1120: PetscCall(MatSetType(Bmpi, MATSCALAPACK));
1121: PetscCall(MatSetUp(Bmpi));
1122: }
1123: b = (Mat_ScaLAPACK *)Bmpi->data;
1125: PetscCall(MatDenseGetLDA(A, &lda));
1126: PetscCall(MatDenseGetArrayRead(A, &aarray));
1127: PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1128: if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1129: if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1130: /* create ScaLAPACK descriptor for A (1d block distribution) */
1131: PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1132: PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */
1133: PetscCall(PetscBLASIntCast(PetscMax(lda, 1), &lld)); /* local leading dimension */
1134: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1135: PetscCheckScaLapackInfo("descinit", info);
1137: /* redistribute matrix */
1138: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1139: Bmpi->nooffprocentries = PETSC_TRUE;
1140: } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1141: PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1142: b->roworiented = PETSC_FALSE;
1143: PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1144: PetscCall(ISGetIndices(ir, &rows));
1145: PetscCall(ISGetIndices(ic, &cols));
1146: PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1147: PetscCall(ISRestoreIndices(ir, &rows));
1148: PetscCall(ISRestoreIndices(ic, &cols));
1149: PetscCall(ISDestroy(&ic));
1150: PetscCall(ISDestroy(&ir));
1151: }
1152: PetscCall(MatDenseRestoreArrayRead(A, &aarray));
1153: PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1154: PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1155: if (reuse == MAT_INPLACE_MATRIX) {
1156: PetscCall(MatHeaderReplace(A, &Bmpi));
1157: } else *B = Bmpi;
1158: PetscFunctionReturn(PETSC_SUCCESS);
1159: }
1161: PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1162: {
1163: Mat mat_scal;
1164: PetscInt M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1165: const PetscInt *cols;
1166: const PetscScalar *vals;
1168: PetscFunctionBegin;
1169: if (reuse == MAT_REUSE_MATRIX) {
1170: mat_scal = *newmat;
1171: PetscCall(MatZeroEntries(mat_scal));
1172: } else {
1173: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1174: m = PETSC_DECIDE;
1175: PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1176: n = PETSC_DECIDE;
1177: PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1178: PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1179: PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1180: PetscCall(MatSetUp(mat_scal));
1181: }
1182: for (row = rstart; row < rend; row++) {
1183: PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1184: PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
1185: PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1186: }
1187: PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1188: PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1190: if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1191: else *newmat = mat_scal;
1192: PetscFunctionReturn(PETSC_SUCCESS);
1193: }
1195: PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1196: {
1197: Mat mat_scal;
1198: PetscInt M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1199: const PetscInt *cols;
1200: const PetscScalar *vals;
1201: PetscScalar v;
1203: PetscFunctionBegin;
1204: if (reuse == MAT_REUSE_MATRIX) {
1205: mat_scal = *newmat;
1206: PetscCall(MatZeroEntries(mat_scal));
1207: } else {
1208: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1209: m = PETSC_DECIDE;
1210: PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1211: n = PETSC_DECIDE;
1212: PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1213: PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1214: PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1215: PetscCall(MatSetUp(mat_scal));
1216: }
1217: PetscCall(MatGetRowUpperTriangular(A));
1218: for (row = rstart; row < rend; row++) {
1219: PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1220: PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1221: for (j = 0; j < ncols; j++) { /* lower triangular part */
1222: if (cols[j] == row) continue;
1223: v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1224: PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1225: }
1226: PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1227: }
1228: PetscCall(MatRestoreRowUpperTriangular(A));
1229: PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1230: PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1232: if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1233: else *newmat = mat_scal;
1234: PetscFunctionReturn(PETSC_SUCCESS);
1235: }
1237: static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1238: {
1239: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1240: PetscInt sz = 0;
1242: PetscFunctionBegin;
1243: PetscCall(PetscLayoutSetUp(A->rmap));
1244: PetscCall(PetscLayoutSetUp(A->cmap));
1245: if (!a->lld) a->lld = a->locr;
1247: PetscCall(PetscFree(a->loc));
1248: PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
1249: PetscCall(PetscCalloc1(sz, &a->loc));
1251: A->preallocated = PETSC_TRUE;
1252: PetscFunctionReturn(PETSC_SUCCESS);
1253: }
1255: static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1256: {
1257: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1258: Mat_ScaLAPACK_Grid *grid;
1259: PetscBool flg;
1260: MPI_Comm icomm;
1262: PetscFunctionBegin;
1263: PetscCall(MatStashDestroy_Private(&A->stash));
1264: PetscCall(PetscFree(a->loc));
1265: PetscCall(PetscFree(a->pivots));
1266: PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1267: PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1268: if (--grid->grid_refct == 0) {
1269: Cblacs_gridexit(grid->ictxt);
1270: Cblacs_gridexit(grid->ictxrow);
1271: Cblacs_gridexit(grid->ictxcol);
1272: PetscCall(PetscFree(grid));
1273: PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1274: }
1275: PetscCall(PetscCommDestroy(&icomm));
1276: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
1277: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1278: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
1279: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
1280: PetscCall(PetscFree(A->data));
1281: PetscFunctionReturn(PETSC_SUCCESS);
1282: }
1284: static PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1285: {
1286: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1287: PetscBLASInt info = 0;
1288: PetscBool flg;
1290: PetscFunctionBegin;
1291: PetscCall(PetscLayoutSetUp(A->rmap));
1292: PetscCall(PetscLayoutSetUp(A->cmap));
1294: /* check that the layout is as enforced by MatCreateScaLAPACK() */
1295: PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1296: PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1297: PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1298: PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1300: /* compute local sizes */
1301: PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
1302: PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1303: a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1304: a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1305: a->lld = PetscMax(1, a->locr);
1307: /* allocate local array */
1308: PetscCall(MatScaLAPACKSetPreallocation(A));
1310: /* set up ScaLAPACK descriptor */
1311: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1312: PetscCheckScaLapackInfo("descinit", info);
1313: PetscFunctionReturn(PETSC_SUCCESS);
1314: }
1316: static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type)
1317: {
1318: PetscInt nstash, reallocs;
1320: PetscFunctionBegin;
1321: if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1322: PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
1323: PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
1324: PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
1325: PetscFunctionReturn(PETSC_SUCCESS);
1326: }
1328: static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type)
1329: {
1330: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1331: PetscMPIInt n;
1332: PetscInt i, flg, *row, *col;
1333: PetscScalar *val;
1334: PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc;
1336: PetscFunctionBegin;
1337: if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1338: while (1) {
1339: PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1340: if (!flg) break;
1341: for (i = 0; i < n; i++) {
1342: PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
1343: PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1344: PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1345: PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1346: switch (A->insertmode) {
1347: case INSERT_VALUES:
1348: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i];
1349: break;
1350: case ADD_VALUES:
1351: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i];
1352: break;
1353: default:
1354: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1355: }
1356: }
1357: }
1358: PetscCall(MatStashScatterEnd_Private(&A->stash));
1359: PetscFunctionReturn(PETSC_SUCCESS);
1360: }
1362: static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer)
1363: {
1364: Mat Adense, As;
1365: MPI_Comm comm;
1367: PetscFunctionBegin;
1368: PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1369: PetscCall(MatCreate(comm, &Adense));
1370: PetscCall(MatSetType(Adense, MATDENSE));
1371: PetscCall(MatLoad(Adense, viewer));
1372: PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
1373: PetscCall(MatDestroy(&Adense));
1374: PetscCall(MatHeaderReplace(newMat, &As));
1375: PetscFunctionReturn(PETSC_SUCCESS);
1376: }
1378: static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1379: NULL,
1380: NULL,
1381: MatMult_ScaLAPACK,
1382: /* 4*/ MatMultAdd_ScaLAPACK,
1383: MatMultTranspose_ScaLAPACK,
1384: MatMultTransposeAdd_ScaLAPACK,
1385: MatSolve_ScaLAPACK,
1386: MatSolveAdd_ScaLAPACK,
1387: NULL,
1388: /*10*/ NULL,
1389: MatLUFactor_ScaLAPACK,
1390: MatCholeskyFactor_ScaLAPACK,
1391: NULL,
1392: MatTranspose_ScaLAPACK,
1393: /*15*/ MatGetInfo_ScaLAPACK,
1394: NULL,
1395: MatGetDiagonal_ScaLAPACK,
1396: MatDiagonalScale_ScaLAPACK,
1397: MatNorm_ScaLAPACK,
1398: /*20*/ MatAssemblyBegin_ScaLAPACK,
1399: MatAssemblyEnd_ScaLAPACK,
1400: MatSetOption_ScaLAPACK,
1401: MatZeroEntries_ScaLAPACK,
1402: /*24*/ NULL,
1403: MatLUFactorSymbolic_ScaLAPACK,
1404: MatLUFactorNumeric_ScaLAPACK,
1405: MatCholeskyFactorSymbolic_ScaLAPACK,
1406: MatCholeskyFactorNumeric_ScaLAPACK,
1407: /*29*/ MatSetUp_ScaLAPACK,
1408: NULL,
1409: NULL,
1410: NULL,
1411: NULL,
1412: /*34*/ MatDuplicate_ScaLAPACK,
1413: NULL,
1414: NULL,
1415: NULL,
1416: NULL,
1417: /*39*/ MatAXPY_ScaLAPACK,
1418: NULL,
1419: NULL,
1420: NULL,
1421: MatCopy_ScaLAPACK,
1422: /*44*/ NULL,
1423: MatScale_ScaLAPACK,
1424: MatShift_ScaLAPACK,
1425: NULL,
1426: NULL,
1427: /*49*/ NULL,
1428: NULL,
1429: NULL,
1430: NULL,
1431: NULL,
1432: /*54*/ NULL,
1433: NULL,
1434: NULL,
1435: NULL,
1436: NULL,
1437: /*59*/ NULL,
1438: MatDestroy_ScaLAPACK,
1439: MatView_ScaLAPACK,
1440: NULL,
1441: NULL,
1442: /*64*/ NULL,
1443: NULL,
1444: NULL,
1445: NULL,
1446: NULL,
1447: /*69*/ NULL,
1448: NULL,
1449: MatConvert_ScaLAPACK_Dense,
1450: NULL,
1451: NULL,
1452: /*74*/ NULL,
1453: NULL,
1454: NULL,
1455: NULL,
1456: NULL,
1457: /*79*/ NULL,
1458: NULL,
1459: NULL,
1460: NULL,
1461: MatLoad_ScaLAPACK,
1462: /*84*/ NULL,
1463: NULL,
1464: NULL,
1465: NULL,
1466: NULL,
1467: /*89*/ NULL,
1468: NULL,
1469: MatMatMultNumeric_ScaLAPACK,
1470: NULL,
1471: NULL,
1472: /*94*/ NULL,
1473: NULL,
1474: NULL,
1475: MatMatTransposeMultNumeric_ScaLAPACK,
1476: NULL,
1477: /*99*/ MatProductSetFromOptions_ScaLAPACK,
1478: NULL,
1479: NULL,
1480: MatConjugate_ScaLAPACK,
1481: NULL,
1482: /*104*/ NULL,
1483: NULL,
1484: NULL,
1485: NULL,
1486: NULL,
1487: /*109*/ MatMatSolve_ScaLAPACK,
1488: NULL,
1489: NULL,
1490: NULL,
1491: MatMissingDiagonal_ScaLAPACK,
1492: /*114*/ NULL,
1493: NULL,
1494: NULL,
1495: NULL,
1496: NULL,
1497: /*119*/ NULL,
1498: MatHermitianTranspose_ScaLAPACK,
1499: MatMultHermitianTranspose_ScaLAPACK,
1500: MatMultHermitianTransposeAdd_ScaLAPACK,
1501: NULL,
1502: /*124*/ NULL,
1503: NULL,
1504: NULL,
1505: NULL,
1506: NULL,
1507: /*129*/ NULL,
1508: NULL,
1509: NULL,
1510: MatTransposeMatMultNumeric_ScaLAPACK,
1511: NULL,
1512: /*134*/ NULL,
1513: NULL,
1514: NULL,
1515: NULL,
1516: NULL,
1517: NULL,
1518: /*140*/ NULL,
1519: NULL,
1520: NULL,
1521: NULL,
1522: NULL,
1523: /*145*/ NULL,
1524: NULL,
1525: NULL,
1526: NULL,
1527: NULL,
1528: /*150*/ NULL,
1529: NULL,
1530: NULL,
1531: NULL,
1532: NULL,
1533: NULL};
1535: static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners)
1536: {
1537: PetscInt *owner, *startv, *starti, bs2;
1538: PetscInt size = stash->size, nsends;
1539: PetscInt *sindices, **rindices, j, l;
1540: PetscScalar **rvalues, *svalues;
1541: MPI_Comm comm = stash->comm;
1542: MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1543: PetscMPIInt tag1 = stash->tag1, tag2 = stash->tag2, *sizes, *nlengths, nreceives, insends, ii;
1544: PetscInt *sp_idx, *sp_idy;
1545: PetscScalar *sp_val;
1546: PetscMatStashSpace space, space_next;
1547: PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc;
1548: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)mat->data;
1550: PetscFunctionBegin;
1551: { /* make sure all processors are either in INSERTMODE or ADDMODE */
1552: InsertMode addv;
1553: PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
1554: PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1555: mat->insertmode = addv; /* in case this processor had no cache */
1556: }
1558: bs2 = stash->bs * stash->bs;
1560: /* first count number of contributors to each processor */
1561: PetscCall(PetscCalloc1(size, &nlengths));
1562: PetscCall(PetscMalloc1(stash->n + 1, &owner));
1564: ii = j = 0;
1565: space = stash->space_head;
1566: while (space) {
1567: space_next = space->next;
1568: for (l = 0; l < space->local_used; l++) {
1569: PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
1570: PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1571: PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1572: j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
1573: nlengths[j]++;
1574: owner[ii] = j;
1575: ii++;
1576: }
1577: space = space_next;
1578: }
1580: /* Now check what procs get messages - and compute nsends. */
1581: PetscCall(PetscCalloc1(size, &sizes));
1582: nsends = 0;
1583: for (PetscMPIInt i = 0; i < size; i++) {
1584: if (nlengths[i]) {
1585: sizes[i] = 1;
1586: nsends++;
1587: }
1588: }
1590: {
1591: PetscMPIInt *onodes, *olengths;
1593: /* Determine the number of messages to expect, their lengths, from from-ids */
1594: PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
1595: PetscCall(PetscMPIIntCast(nsends, &insends));
1596: PetscCall(PetscGatherMessageLengths(comm, insends, nreceives, nlengths, &onodes, &olengths));
1597: /* since clubbing row,col - lengths are multiplied by 2 */
1598: for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] *= 2;
1599: PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1600: /* values are size 'bs2' lengths (and remove earlier factor 2 */
1601: for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] = (PetscMPIInt)(olengths[i] * bs2 / 2);
1602: PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
1603: PetscCall(PetscFree(onodes));
1604: PetscCall(PetscFree(olengths));
1605: }
1607: /* do sends:
1608: 1) starts[i] gives the starting index in svalues for stuff going to
1609: the ith processor
1610: */
1611: PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
1612: PetscCall(PetscMalloc1(2 * nsends, &send_waits));
1613: PetscCall(PetscMalloc2(size, &startv, size, &starti));
1614: /* use 2 sends the first with all_a, the next with all_i and all_j */
1615: startv[0] = 0;
1616: starti[0] = 0;
1617: for (PetscMPIInt i = 1; i < size; i++) {
1618: startv[i] = startv[i - 1] + nlengths[i - 1];
1619: starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1620: }
1622: ii = 0;
1623: space = stash->space_head;
1624: while (space) {
1625: space_next = space->next;
1626: sp_idx = space->idx;
1627: sp_idy = space->idy;
1628: sp_val = space->val;
1629: for (l = 0; l < space->local_used; l++) {
1630: j = owner[ii];
1631: if (bs2 == 1) {
1632: svalues[startv[j]] = sp_val[l];
1633: } else {
1634: PetscInt k;
1635: PetscScalar *buf1, *buf2;
1636: buf1 = svalues + bs2 * startv[j];
1637: buf2 = space->val + bs2 * l;
1638: for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1639: }
1640: sindices[starti[j]] = sp_idx[l];
1641: sindices[starti[j] + nlengths[j]] = sp_idy[l];
1642: startv[j]++;
1643: starti[j]++;
1644: ii++;
1645: }
1646: space = space_next;
1647: }
1648: startv[0] = 0;
1649: for (PetscMPIInt i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1651: for (PetscMPIInt i = 0, count = 0; i < size; i++) {
1652: if (sizes[i]) {
1653: PetscCallMPI(MPIU_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
1654: PetscCallMPI(MPIU_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1655: }
1656: }
1657: #if defined(PETSC_USE_INFO)
1658: PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
1659: for (PetscMPIInt i = 0; i < size; i++) {
1660: if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %d: size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1661: }
1662: #endif
1663: PetscCall(PetscFree(nlengths));
1664: PetscCall(PetscFree(owner));
1665: PetscCall(PetscFree2(startv, starti));
1666: PetscCall(PetscFree(sizes));
1668: /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1669: PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
1671: for (PetscMPIInt i = 0; i < nreceives; i++) {
1672: recv_waits[2 * i] = recv_waits1[i];
1673: recv_waits[2 * i + 1] = recv_waits2[i];
1674: }
1675: stash->recv_waits = recv_waits;
1677: PetscCall(PetscFree(recv_waits1));
1678: PetscCall(PetscFree(recv_waits2));
1680: stash->svalues = svalues;
1681: stash->sindices = sindices;
1682: stash->rvalues = rvalues;
1683: stash->rindices = rindices;
1684: stash->send_waits = send_waits;
1685: stash->nsends = (PetscMPIInt)nsends;
1686: stash->nrecvs = nreceives;
1687: stash->reproduce_count = 0;
1688: PetscFunctionReturn(PETSC_SUCCESS);
1689: }
1691: static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb)
1692: {
1693: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1695: PetscFunctionBegin;
1696: PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1697: PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1698: PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
1699: PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1700: PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1701: PetscFunctionReturn(PETSC_SUCCESS);
1702: }
1704: /*@
1705: MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1706: the `MATSCALAPACK` matrix
1708: Logically Collective
1710: Input Parameters:
1711: + A - a `MATSCALAPACK` matrix
1712: . mb - the row block size
1713: - nb - the column block size
1715: Level: intermediate
1717: Note:
1718: This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1720: .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1721: @*/
1722: PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb)
1723: {
1724: PetscFunctionBegin;
1728: PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
1729: PetscFunctionReturn(PETSC_SUCCESS);
1730: }
1732: static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb)
1733: {
1734: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1736: PetscFunctionBegin;
1737: if (mb) *mb = a->mb;
1738: if (nb) *nb = a->nb;
1739: PetscFunctionReturn(PETSC_SUCCESS);
1740: }
1742: /*@
1743: MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1744: the `MATSCALAPACK` matrix
1746: Not Collective
1748: Input Parameter:
1749: . A - a `MATSCALAPACK` matrix
1751: Output Parameters:
1752: + mb - the row block size
1753: - nb - the column block size
1755: Level: intermediate
1757: Note:
1758: This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1760: .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1761: @*/
1762: PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb)
1763: {
1764: PetscFunctionBegin;
1766: PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
1767: PetscFunctionReturn(PETSC_SUCCESS);
1768: }
1770: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1771: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1773: /*MC
1774: MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1776: Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK
1778: Options Database Keys:
1779: + -mat_type scalapack - sets the matrix type to `MATSCALAPACK`
1780: . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu`
1781: . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1782: - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1784: Level: intermediate
1786: Note:
1787: Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1788: range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1789: the given rank.
1791: .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()`
1792: M*/
1794: PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1795: {
1796: Mat_ScaLAPACK *a;
1797: PetscBool flg, flg1;
1798: Mat_ScaLAPACK_Grid *grid;
1799: MPI_Comm icomm;
1800: PetscBLASInt nprow, npcol, myrow, mycol;
1801: PetscInt optv1, k = 2, array[2] = {0, 0};
1802: PetscMPIInt size;
1804: PetscFunctionBegin;
1805: A->ops[0] = MatOps_Values;
1806: A->insertmode = NOT_SET_VALUES;
1808: PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1809: A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK;
1810: A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1811: A->stash.ScatterEnd = MatStashScatterEnd_Ref;
1812: A->stash.ScatterDestroy = NULL;
1814: PetscCall(PetscNew(&a));
1815: A->data = (void *)a;
1817: /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1818: if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1819: PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, NULL));
1820: PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1821: PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1822: }
1823: PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1824: PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1825: if (!flg) {
1826: PetscCall(PetscNew(&grid));
1828: PetscCallMPI(MPI_Comm_size(icomm, &size));
1829: PetscCall(PetscBLASIntCast(PetscSqrtReal((PetscReal)size) + 0.001, &grid->nprow));
1831: PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
1832: PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1));
1833: if (flg1) {
1834: PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
1835: PetscCall(PetscBLASIntCast(optv1, &grid->nprow));
1836: }
1837: PetscOptionsEnd();
1839: if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1840: grid->npcol = size / grid->nprow;
1841: PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
1842: PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1843: grid->ictxt = Csys2blacs_handle(icomm);
1844: Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1845: Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1846: grid->grid_refct = 1;
1847: grid->nprow = nprow;
1848: grid->npcol = npcol;
1849: grid->myrow = myrow;
1850: grid->mycol = mycol;
1851: /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1852: grid->ictxrow = Csys2blacs_handle(icomm);
1853: Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1854: grid->ictxcol = Csys2blacs_handle(icomm);
1855: Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
1856: PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));
1858: } else grid->grid_refct++;
1859: PetscCall(PetscCommDestroy(&icomm));
1860: a->grid = grid;
1861: a->mb = DEFAULT_BLOCKSIZE;
1862: a->nb = DEFAULT_BLOCKSIZE;
1864: PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
1865: PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1866: if (flg) {
1867: a->mb = (PetscMPIInt)array[0];
1868: a->nb = (k > 1) ? (PetscMPIInt)array[1] : a->mb;
1869: }
1870: PetscOptionsEnd();
1872: a->roworiented = PETSC_TRUE;
1873: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
1874: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
1875: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
1876: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
1877: PetscFunctionReturn(PETSC_SUCCESS);
1878: }
1880: /*@C
1881: MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1882: (2D block cyclic distribution) for a `MATSCALAPACK` matrix
1884: Collective
1886: Input Parameters:
1887: + comm - MPI communicator
1888: . mb - row block size (or `PETSC_DECIDE` to have it set)
1889: . nb - column block size (or `PETSC_DECIDE` to have it set)
1890: . M - number of global rows
1891: . N - number of global columns
1892: . rsrc - coordinate of process that owns the first row of the distributed matrix
1893: - csrc - coordinate of process that owns the first column of the distributed matrix
1895: Output Parameter:
1896: . A - the matrix
1898: Options Database Key:
1899: . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1901: Level: intermediate
1903: Notes:
1904: If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen
1906: It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1907: MatXXXXSetPreallocation() paradigm instead of this routine directly.
1908: [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
1910: Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1911: configured with ScaLAPACK. In particular, PETSc's local sizes lose
1912: significance and are thus ignored. The block sizes refer to the values
1913: used for the distributed matrix, not the same meaning as in `MATBAIJ`.
1915: .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1916: @*/
1917: PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A)
1918: {
1919: Mat_ScaLAPACK *a;
1920: PetscInt m, n;
1922: PetscFunctionBegin;
1923: PetscCall(MatCreate(comm, A));
1924: PetscCall(MatSetType(*A, MATSCALAPACK));
1925: PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1926: /* rows and columns are NOT distributed according to PetscSplitOwnership */
1927: m = PETSC_DECIDE;
1928: PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1929: n = PETSC_DECIDE;
1930: PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1931: PetscCall(MatSetSizes(*A, m, n, M, N));
1932: a = (Mat_ScaLAPACK *)(*A)->data;
1933: PetscCall(PetscBLASIntCast(M, &a->M));
1934: PetscCall(PetscBLASIntCast(N, &a->N));
1935: PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1936: PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1937: PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
1938: PetscCall(PetscBLASIntCast(csrc, &a->csrc));
1939: PetscCall(MatSetUp(*A));
1940: PetscFunctionReturn(PETSC_SUCCESS);
1941: }