Actual source code: matscalapack.c
1: #include <petsc/private/petscscalapack.h>
3: const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4: " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5: " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6: " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7: " TITLE = {Sca{LAPACK} Users' Guide},\n"
8: " PUBLISHER = {SIAM},\n"
9: " ADDRESS = {Philadelphia, PA},\n"
10: " YEAR = 1997\n"
11: "}\n";
12: static PetscBool ScaLAPACKCite = PETSC_FALSE;
14: #define DEFAULT_BLOCKSIZE 64
16: /*
17: The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18: is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19: */
20: static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
22: static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23: {
24: PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n");
25: MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);
26: return 0;
27: }
29: static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer)
30: {
31: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
32: PetscBool iascii;
33: PetscViewerFormat format;
34: Mat Adense;
36: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
37: if (iascii) {
38: PetscViewerGetFormat(viewer, &format);
39: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
40: PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb);
41: PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol);
42: PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc);
43: PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc);
44: return 0;
45: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
46: return 0;
47: }
48: }
49: /* convert to dense format and call MatView() */
50: MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense);
51: MatView(Adense, viewer);
52: MatDestroy(&Adense);
53: return 0;
54: }
56: static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info)
57: {
58: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
59: PetscLogDouble isend[2], irecv[2];
61: info->block_size = 1.0;
63: isend[0] = a->lld * a->locc; /* locally allocated */
64: isend[1] = a->locr * a->locc; /* used submatrix */
65: if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
66: info->nz_allocated = isend[0];
67: info->nz_used = isend[1];
68: } else if (flag == MAT_GLOBAL_MAX) {
69: MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A));
70: info->nz_allocated = irecv[0];
71: info->nz_used = irecv[1];
72: } else if (flag == MAT_GLOBAL_SUM) {
73: MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A));
74: info->nz_allocated = irecv[0];
75: info->nz_used = irecv[1];
76: }
78: info->nz_unneeded = 0;
79: info->assemblies = A->num_ass;
80: info->mallocs = 0;
81: info->memory = 0; /* REVIEW ME */
82: info->fill_ratio_given = 0;
83: info->fill_ratio_needed = 0;
84: info->factor_mallocs = 0;
85: return 0;
86: }
88: PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg)
89: {
90: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
92: switch (op) {
93: case MAT_NEW_NONZERO_LOCATIONS:
94: case MAT_NEW_NONZERO_LOCATION_ERR:
95: case MAT_NEW_NONZERO_ALLOCATION_ERR:
96: case MAT_SYMMETRIC:
97: case MAT_SORTED_FULL:
98: case MAT_HERMITIAN:
99: break;
100: case MAT_ROW_ORIENTED:
101: a->roworiented = flg;
102: break;
103: default:
104: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
105: }
106: return 0;
107: }
109: static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
110: {
111: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
112: PetscInt i, j;
113: PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc;
114: PetscBool roworiented = a->roworiented;
117: for (i = 0; i < nr; i++) {
118: if (rows[i] < 0) continue;
119: PetscBLASIntCast(rows[i] + 1, &gridx);
120: for (j = 0; j < nc; j++) {
121: if (cols[j] < 0) continue;
122: PetscBLASIntCast(cols[j] + 1, &gcidx);
123: PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
124: if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
125: if (roworiented) {
126: switch (imode) {
127: case INSERT_VALUES:
128: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j];
129: break;
130: default:
131: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j];
132: break;
133: }
134: } else {
135: switch (imode) {
136: case INSERT_VALUES:
137: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr];
138: break;
139: default:
140: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr];
141: break;
142: }
143: }
144: } else {
146: A->assembled = PETSC_FALSE;
147: MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES));
148: }
149: }
150: }
151: return 0;
152: }
154: static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscScalar beta, const PetscScalar *x, PetscScalar *y)
155: {
156: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
157: PetscScalar *x2d, *y2d, alpha = 1.0;
158: const PetscInt *ranges;
159: PetscBLASInt xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
161: if (transpose) {
162: /* create ScaLAPACK descriptors for vectors (1d block distribution) */
163: PetscLayoutGetRanges(A->rmap, &ranges);
164: PetscBLASIntCast(ranges[1], &mb); /* x block size */
165: xlld = PetscMax(1, A->rmap->n);
166: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
168: PetscLayoutGetRanges(A->cmap, &ranges);
169: PetscBLASIntCast(ranges[1], &nb); /* y block size */
170: ylld = 1;
171: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
174: /* allocate 2d vectors */
175: lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
176: lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
177: PetscMalloc2(lszx, &x2d, lszy, &y2d);
178: xlld = PetscMax(1, lszx);
180: /* create ScaLAPACK descriptors for vectors (2d block distribution) */
181: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
183: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
186: /* redistribute x as a column of a 2d matrix */
187: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
189: /* redistribute y as a row of a 2d matrix */
190: if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
192: /* call PBLAS subroutine */
193: PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
195: /* redistribute y from a row of a 2d matrix */
196: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
198: } else { /* non-transpose */
200: /* create ScaLAPACK descriptors for vectors (1d block distribution) */
201: PetscLayoutGetRanges(A->cmap, &ranges);
202: PetscBLASIntCast(ranges[1], &nb); /* x block size */
203: xlld = 1;
204: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
206: PetscLayoutGetRanges(A->rmap, &ranges);
207: PetscBLASIntCast(ranges[1], &mb); /* y block size */
208: ylld = PetscMax(1, A->rmap->n);
209: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
212: /* allocate 2d vectors */
213: lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
214: lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
215: PetscMalloc2(lszx, &x2d, lszy, &y2d);
216: ylld = PetscMax(1, lszy);
218: /* create ScaLAPACK descriptors for vectors (2d block distribution) */
219: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
221: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
224: /* redistribute x as a row of a 2d matrix */
225: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
227: /* redistribute y as a column of a 2d matrix */
228: if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
230: /* call PBLAS subroutine */
231: PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
233: /* redistribute y from a column of a 2d matrix */
234: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
235: }
236: PetscFree2(x2d, y2d);
237: return 0;
238: }
240: static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y)
241: {
242: const PetscScalar *xarray;
243: PetscScalar *yarray;
245: VecGetArrayRead(x, &xarray);
246: VecGetArray(y, &yarray);
247: MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 0.0, xarray, yarray);
248: VecRestoreArrayRead(x, &xarray);
249: VecRestoreArray(y, &yarray);
250: return 0;
251: }
253: static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
254: {
255: const PetscScalar *xarray;
256: PetscScalar *yarray;
258: VecGetArrayRead(x, &xarray);
259: VecGetArray(y, &yarray);
260: MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 0.0, xarray, yarray);
261: VecRestoreArrayRead(x, &xarray);
262: VecRestoreArray(y, &yarray);
263: return 0;
264: }
266: static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
267: {
268: const PetscScalar *xarray;
269: PetscScalar *zarray;
271: if (y != z) VecCopy(y, z);
272: VecGetArrayRead(x, &xarray);
273: VecGetArray(z, &zarray);
274: MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 1.0, xarray, zarray);
275: VecRestoreArrayRead(x, &xarray);
276: VecRestoreArray(z, &zarray);
277: return 0;
278: }
280: static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
281: {
282: const PetscScalar *xarray;
283: PetscScalar *zarray;
285: if (y != z) VecCopy(y, z);
286: VecGetArrayRead(x, &xarray);
287: VecGetArray(z, &zarray);
288: MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 1.0, xarray, zarray);
289: VecRestoreArrayRead(x, &xarray);
290: VecRestoreArray(z, &zarray);
291: return 0;
292: }
294: PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
295: {
296: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
297: Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
298: Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data;
299: PetscScalar sone = 1.0, zero = 0.0;
300: PetscBLASInt one = 1;
302: PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
303: C->assembled = PETSC_TRUE;
304: return 0;
305: }
307: PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
308: {
309: MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE);
310: MatSetType(C, MATSCALAPACK);
311: MatSetUp(C);
312: C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
313: return 0;
314: }
316: static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
317: {
318: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
319: Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
320: Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data;
321: PetscScalar sone = 1.0, zero = 0.0;
322: PetscBLASInt one = 1;
324: PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
325: C->assembled = PETSC_TRUE;
326: return 0;
327: }
329: static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
330: {
331: MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE);
332: MatSetType(C, MATSCALAPACK);
333: MatSetUp(C);
334: return 0;
335: }
337: /* --------------------------------------- */
338: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
339: {
340: C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
341: C->ops->productsymbolic = MatProductSymbolic_AB;
342: return 0;
343: }
345: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
346: {
347: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
348: C->ops->productsymbolic = MatProductSymbolic_ABt;
349: return 0;
350: }
352: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
353: {
354: Mat_Product *product = C->product;
356: switch (product->type) {
357: case MATPRODUCT_AB:
358: MatProductSetFromOptions_ScaLAPACK_AB(C);
359: break;
360: case MATPRODUCT_ABt:
361: MatProductSetFromOptions_ScaLAPACK_ABt(C);
362: break;
363: default:
364: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
365: }
366: return 0;
367: }
368: /* --------------------------------------- */
370: static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D)
371: {
372: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
373: PetscScalar *darray, *d2d, v;
374: const PetscInt *ranges;
375: PetscBLASInt j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
377: VecGetArray(D, &darray);
379: if (A->rmap->N <= A->cmap->N) { /* row version */
381: /* create ScaLAPACK descriptor for vector (1d block distribution) */
382: PetscLayoutGetRanges(A->rmap, &ranges);
383: PetscBLASIntCast(ranges[1], &mb); /* D block size */
384: dlld = PetscMax(1, A->rmap->n);
385: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
388: /* allocate 2d vector */
389: lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
390: PetscCalloc1(lszd, &d2d);
391: dlld = PetscMax(1, lszd);
393: /* create ScaLAPACK descriptor for vector (2d block distribution) */
394: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
397: /* collect diagonal */
398: for (j = 1; j <= a->M; j++) {
399: PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
400: PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
401: }
403: /* redistribute d from a column of a 2d matrix */
404: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
405: PetscFree(d2d);
407: } else { /* column version */
409: /* create ScaLAPACK descriptor for vector (1d block distribution) */
410: PetscLayoutGetRanges(A->cmap, &ranges);
411: PetscBLASIntCast(ranges[1], &nb); /* D block size */
412: dlld = 1;
413: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
416: /* allocate 2d vector */
417: lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
418: PetscCalloc1(lszd, &d2d);
420: /* create ScaLAPACK descriptor for vector (2d block distribution) */
421: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
424: /* collect diagonal */
425: for (j = 1; j <= a->N; j++) {
426: PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
427: PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
428: }
430: /* redistribute d from a row of a 2d matrix */
431: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
432: PetscFree(d2d);
433: }
435: VecRestoreArray(D, &darray);
436: VecAssemblyBegin(D);
437: VecAssemblyEnd(D);
438: return 0;
439: }
441: static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R)
442: {
443: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
444: const PetscScalar *d;
445: const PetscInt *ranges;
446: PetscScalar *d2d;
447: PetscBLASInt i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
449: if (R) {
450: VecGetArrayRead(R, (const PetscScalar **)&d);
451: /* create ScaLAPACK descriptor for vector (1d block distribution) */
452: PetscLayoutGetRanges(A->cmap, &ranges);
453: PetscBLASIntCast(ranges[1], &nb); /* D block size */
454: dlld = 1;
455: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
458: /* allocate 2d vector */
459: lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
460: PetscCalloc1(lszd, &d2d);
462: /* create ScaLAPACK descriptor for vector (2d block distribution) */
463: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
466: /* redistribute d to a row of a 2d matrix */
467: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
469: /* broadcast along process columns */
470: if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
471: else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
473: /* local scaling */
474: for (j = 0; j < a->locc; j++)
475: for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
477: PetscFree(d2d);
478: VecRestoreArrayRead(R, (const PetscScalar **)&d);
479: }
480: if (L) {
481: VecGetArrayRead(L, (const PetscScalar **)&d);
482: /* create ScaLAPACK descriptor for vector (1d block distribution) */
483: PetscLayoutGetRanges(A->rmap, &ranges);
484: PetscBLASIntCast(ranges[1], &mb); /* D block size */
485: dlld = PetscMax(1, A->rmap->n);
486: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
489: /* allocate 2d vector */
490: lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
491: PetscCalloc1(lszd, &d2d);
492: dlld = PetscMax(1, lszd);
494: /* create ScaLAPACK descriptor for vector (2d block distribution) */
495: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
498: /* redistribute d to a column of a 2d matrix */
499: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
501: /* broadcast along process rows */
502: if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
503: else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
505: /* local scaling */
506: for (i = 0; i < a->locr; i++)
507: for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
509: PetscFree(d2d);
510: VecRestoreArrayRead(L, (const PetscScalar **)&d);
511: }
512: return 0;
513: }
515: static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d)
516: {
517: *missing = PETSC_FALSE;
518: return 0;
519: }
521: static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a)
522: {
523: Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
524: PetscBLASInt n, one = 1;
526: n = x->lld * x->locc;
527: PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
528: return 0;
529: }
531: static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha)
532: {
533: Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
534: PetscBLASInt i, n;
535: PetscScalar v;
537: n = PetscMin(x->M, x->N);
538: for (i = 1; i <= n; i++) {
539: PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
540: v += alpha;
541: PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
542: }
543: return 0;
544: }
546: static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
547: {
548: Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
549: Mat_ScaLAPACK *y = (Mat_ScaLAPACK *)Y->data;
550: PetscBLASInt one = 1;
551: PetscScalar beta = 1.0;
553: MatScaLAPACKCheckDistribution(Y, 1, X, 3);
554: PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
555: PetscObjectStateIncrease((PetscObject)Y);
556: return 0;
557: }
559: static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str)
560: {
561: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
562: Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
564: PetscArraycpy(b->loc, a->loc, a->lld * a->locc);
565: PetscObjectStateIncrease((PetscObject)B);
566: return 0;
567: }
569: static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B)
570: {
571: Mat Bs;
572: MPI_Comm comm;
573: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
575: PetscObjectGetComm((PetscObject)A, &comm);
576: MatCreate(comm, &Bs);
577: MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE);
578: MatSetType(Bs, MATSCALAPACK);
579: b = (Mat_ScaLAPACK *)Bs->data;
580: b->M = a->M;
581: b->N = a->N;
582: b->mb = a->mb;
583: b->nb = a->nb;
584: b->rsrc = a->rsrc;
585: b->csrc = a->csrc;
586: MatSetUp(Bs);
587: *B = Bs;
588: if (op == MAT_COPY_VALUES) PetscArraycpy(b->loc, a->loc, a->lld * a->locc);
589: Bs->assembled = PETSC_TRUE;
590: return 0;
591: }
593: static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
594: {
595: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
596: Mat Bs = *B;
597: PetscBLASInt one = 1;
598: PetscScalar sone = 1.0, zero = 0.0;
599: #if defined(PETSC_USE_COMPLEX)
600: PetscInt i, j;
601: #endif
603: if (reuse == MAT_REUSE_MATRIX) MatTransposeCheckNonzeroState_Private(A, *B);
604: if (reuse == MAT_INITIAL_MATRIX) {
605: MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs);
606: *B = Bs;
607: } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
608: b = (Mat_ScaLAPACK *)Bs->data;
609: PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
610: #if defined(PETSC_USE_COMPLEX)
611: /* undo conjugation */
612: for (i = 0; i < b->locr; i++)
613: for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
614: #endif
615: Bs->assembled = PETSC_TRUE;
616: return 0;
617: }
619: static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
620: {
621: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
622: PetscInt i, j;
624: for (i = 0; i < a->locr; i++)
625: for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
626: return 0;
627: }
629: static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
630: {
631: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
632: Mat Bs = *B;
633: PetscBLASInt one = 1;
634: PetscScalar sone = 1.0, zero = 0.0;
636: if (reuse == MAT_INITIAL_MATRIX) {
637: MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs);
638: *B = Bs;
639: } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
640: b = (Mat_ScaLAPACK *)Bs->data;
641: PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
642: Bs->assembled = PETSC_TRUE;
643: return 0;
644: }
646: static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X)
647: {
648: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
649: PetscScalar *x, *x2d;
650: const PetscInt *ranges;
651: PetscBLASInt xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
653: VecCopy(B, X);
654: VecGetArray(X, &x);
656: /* create ScaLAPACK descriptor for a vector (1d block distribution) */
657: PetscLayoutGetRanges(A->rmap, &ranges);
658: PetscBLASIntCast(ranges[1], &mb); /* x block size */
659: xlld = PetscMax(1, A->rmap->n);
660: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
663: /* allocate 2d vector */
664: lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
665: PetscMalloc1(lszx, &x2d);
666: xlld = PetscMax(1, lszx);
668: /* create ScaLAPACK descriptor for a vector (2d block distribution) */
669: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
672: /* redistribute x as a column of a 2d matrix */
673: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
675: /* call ScaLAPACK subroutine */
676: switch (A->factortype) {
677: case MAT_FACTOR_LU:
678: PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
680: break;
681: case MAT_FACTOR_CHOLESKY:
682: PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
684: break;
685: default:
686: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
687: }
689: /* redistribute x from a column of a 2d matrix */
690: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
692: PetscFree(x2d);
693: VecRestoreArray(X, &x);
694: return 0;
695: }
697: static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X)
698: {
699: MatSolve_ScaLAPACK(A, B, X);
700: VecAXPY(X, 1, Y);
701: return 0;
702: }
704: static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X)
705: {
706: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x;
707: PetscBool flg1, flg2;
708: PetscBLASInt one = 1, info;
710: PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1);
711: PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2);
713: MatScaLAPACKCheckDistribution(B, 1, X, 2);
714: b = (Mat_ScaLAPACK *)B->data;
715: x = (Mat_ScaLAPACK *)X->data;
716: PetscArraycpy(x->loc, b->loc, b->lld * b->locc);
718: switch (A->factortype) {
719: case MAT_FACTOR_LU:
720: PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
722: break;
723: case MAT_FACTOR_CHOLESKY:
724: PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
726: break;
727: default:
728: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
729: }
730: return 0;
731: }
733: static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo)
734: {
735: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
736: PetscBLASInt one = 1, info;
738: if (!a->pivots) { PetscMalloc1(a->locr + a->mb, &a->pivots); }
739: PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
741: A->factortype = MAT_FACTOR_LU;
742: A->assembled = PETSC_TRUE;
744: PetscFree(A->solvertype);
745: PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype);
746: return 0;
747: }
749: static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
750: {
751: MatCopy(A, F, SAME_NONZERO_PATTERN);
752: MatLUFactor_ScaLAPACK(F, 0, 0, info);
753: return 0;
754: }
756: static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
757: {
758: /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
759: return 0;
760: }
762: static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo)
763: {
764: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
765: PetscBLASInt one = 1, info;
767: PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
769: A->factortype = MAT_FACTOR_CHOLESKY;
770: A->assembled = PETSC_TRUE;
772: PetscFree(A->solvertype);
773: PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype);
774: return 0;
775: }
777: static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
778: {
779: MatCopy(A, F, SAME_NONZERO_PATTERN);
780: MatCholeskyFactor_ScaLAPACK(F, 0, info);
781: return 0;
782: }
784: static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info)
785: {
786: /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
787: return 0;
788: }
790: PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type)
791: {
792: *type = MATSOLVERSCALAPACK;
793: return 0;
794: }
796: static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F)
797: {
798: Mat B;
799: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
801: /* Create the factorization matrix */
802: MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B);
803: B->trivialsymbolic = PETSC_TRUE;
804: B->factortype = ftype;
805: PetscFree(B->solvertype);
806: PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype);
808: PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack);
809: *F = B;
810: return 0;
811: }
813: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
814: {
815: MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack);
816: MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack);
817: return 0;
818: }
820: static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm)
821: {
822: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
823: PetscBLASInt one = 1, lwork = 0;
824: const char *ntype;
825: PetscScalar *work = NULL, dummy;
827: switch (type) {
828: case NORM_1:
829: ntype = "1";
830: lwork = PetscMax(a->locr, a->locc);
831: break;
832: case NORM_FROBENIUS:
833: ntype = "F";
834: work = &dummy;
835: break;
836: case NORM_INFINITY:
837: ntype = "I";
838: lwork = PetscMax(a->locr, a->locc);
839: break;
840: default:
841: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
842: }
843: if (lwork) PetscMalloc1(lwork, &work);
844: *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
845: if (lwork) PetscFree(work);
846: return 0;
847: }
849: static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
850: {
851: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
853: PetscArrayzero(a->loc, a->lld * a->locc);
854: return 0;
855: }
857: static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols)
858: {
859: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
860: PetscInt i, n, nb, isrc, nproc, iproc, *idx;
862: if (rows) {
863: n = a->locr;
864: nb = a->mb;
865: isrc = a->rsrc;
866: nproc = a->grid->nprow;
867: iproc = a->grid->myrow;
868: PetscMalloc1(n, &idx);
869: for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
870: ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows);
871: }
872: if (cols) {
873: n = a->locc;
874: nb = a->nb;
875: isrc = a->csrc;
876: nproc = a->grid->npcol;
877: iproc = a->grid->mycol;
878: PetscMalloc1(n, &idx);
879: for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
880: ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols);
881: }
882: return 0;
883: }
885: static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B)
886: {
887: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
888: Mat Bmpi;
889: MPI_Comm comm;
890: PetscInt i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz;
891: const PetscInt *ranges, *branges, *cwork;
892: const PetscScalar *vwork;
893: PetscBLASInt bdesc[9], bmb, zero = 0, one = 1, lld, info;
894: PetscScalar *barray;
895: PetscBool differ = PETSC_FALSE;
896: PetscMPIInt size;
898: PetscObjectGetComm((PetscObject)A, &comm);
899: PetscLayoutGetRanges(A->rmap, &ranges);
901: if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
902: MPI_Comm_size(comm, &size);
903: PetscLayoutGetRanges((*B)->rmap, &branges);
904: for (i = 0; i < size; i++)
905: if (ranges[i + 1] != branges[i + 1]) {
906: differ = PETSC_TRUE;
907: break;
908: }
909: }
911: if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
912: MatCreate(comm, &Bmpi);
913: m = PETSC_DECIDE;
914: PetscSplitOwnershipEqual(comm, &m, &M);
915: n = PETSC_DECIDE;
916: PetscSplitOwnershipEqual(comm, &n, &N);
917: MatSetSizes(Bmpi, m, n, M, N);
918: MatSetType(Bmpi, MATDENSE);
919: MatSetUp(Bmpi);
921: /* create ScaLAPACK descriptor for B (1d block distribution) */
922: PetscBLASIntCast(ranges[1], &bmb); /* row block size */
923: lld = PetscMax(A->rmap->n, 1); /* local leading dimension */
924: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
927: /* redistribute matrix */
928: MatDenseGetArray(Bmpi, &barray);
929: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
930: MatDenseRestoreArray(Bmpi, &barray);
931: MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY);
932: MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY);
934: /* transfer rows of auxiliary matrix to the final matrix B */
935: MatGetOwnershipRange(Bmpi, &rstart, &rend);
936: for (i = rstart; i < rend; i++) {
937: MatGetRow(Bmpi, i, &nz, &cwork, &vwork);
938: MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES);
939: MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork);
940: }
941: MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY);
942: MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY);
943: MatDestroy(&Bmpi);
945: } else { /* normal cases */
947: if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
948: else {
949: MatCreate(comm, &Bmpi);
950: m = PETSC_DECIDE;
951: PetscSplitOwnershipEqual(comm, &m, &M);
952: n = PETSC_DECIDE;
953: PetscSplitOwnershipEqual(comm, &n, &N);
954: MatSetSizes(Bmpi, m, n, M, N);
955: MatSetType(Bmpi, MATDENSE);
956: MatSetUp(Bmpi);
957: }
959: /* create ScaLAPACK descriptor for B (1d block distribution) */
960: PetscBLASIntCast(ranges[1], &bmb); /* row block size */
961: lld = PetscMax(A->rmap->n, 1); /* local leading dimension */
962: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
965: /* redistribute matrix */
966: MatDenseGetArray(Bmpi, &barray);
967: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
968: MatDenseRestoreArray(Bmpi, &barray);
970: MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY);
971: MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY);
972: if (reuse == MAT_INPLACE_MATRIX) {
973: MatHeaderReplace(A, &Bmpi);
974: } else *B = Bmpi;
975: }
976: return 0;
977: }
979: static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct)
980: {
981: const PetscInt *ranges;
982: PetscMPIInt size;
983: PetscInt i, n;
985: *correct = PETSC_TRUE;
986: MPI_Comm_size(map->comm, &size);
987: if (size > 1) {
988: PetscLayoutGetRanges(map, &ranges);
989: n = ranges[1] - ranges[0];
990: for (i = 1; i < size; i++)
991: if (ranges[i + 1] - ranges[i] != n) break;
992: *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
993: }
994: return 0;
995: }
997: PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B)
998: {
999: Mat_ScaLAPACK *b;
1000: Mat Bmpi;
1001: MPI_Comm comm;
1002: PetscInt M = A->rmap->N, N = A->cmap->N, m, n;
1003: const PetscInt *ranges, *rows, *cols;
1004: PetscBLASInt adesc[9], amb, zero = 0, one = 1, lld, info;
1005: PetscScalar *aarray;
1006: IS ir, ic;
1007: PetscInt lda;
1008: PetscBool flg;
1010: PetscObjectGetComm((PetscObject)A, &comm);
1012: if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1013: else {
1014: MatCreate(comm, &Bmpi);
1015: m = PETSC_DECIDE;
1016: PetscSplitOwnershipEqual(comm, &m, &M);
1017: n = PETSC_DECIDE;
1018: PetscSplitOwnershipEqual(comm, &n, &N);
1019: MatSetSizes(Bmpi, m, n, M, N);
1020: MatSetType(Bmpi, MATSCALAPACK);
1021: MatSetUp(Bmpi);
1022: }
1023: b = (Mat_ScaLAPACK *)Bmpi->data;
1025: MatDenseGetLDA(A, &lda);
1026: MatDenseGetArray(A, &aarray);
1027: MatScaLAPACKCheckLayout(A->rmap, &flg);
1028: if (flg) MatScaLAPACKCheckLayout(A->cmap, &flg);
1029: if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1030: /* create ScaLAPACK descriptor for A (1d block distribution) */
1031: PetscLayoutGetRanges(A->rmap, &ranges);
1032: PetscBLASIntCast(ranges[1], &amb); /* row block size */
1033: lld = PetscMax(lda, 1); /* local leading dimension */
1034: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1037: /* redistribute matrix */
1038: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1039: Bmpi->nooffprocentries = PETSC_TRUE;
1040: } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1042: b->roworiented = PETSC_FALSE;
1043: MatGetOwnershipIS(A, &ir, &ic);
1044: ISGetIndices(ir, &rows);
1045: ISGetIndices(ic, &cols);
1046: MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES);
1047: ISRestoreIndices(ir, &rows);
1048: ISRestoreIndices(ic, &cols);
1049: ISDestroy(&ic);
1050: ISDestroy(&ir);
1051: }
1052: MatDenseRestoreArray(A, &aarray);
1053: MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY);
1054: MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY);
1055: if (reuse == MAT_INPLACE_MATRIX) {
1056: MatHeaderReplace(A, &Bmpi);
1057: } else *B = Bmpi;
1058: return 0;
1059: }
1061: PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1062: {
1063: Mat mat_scal;
1064: PetscInt M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1065: const PetscInt *cols;
1066: const PetscScalar *vals;
1068: if (reuse == MAT_REUSE_MATRIX) {
1069: mat_scal = *newmat;
1070: MatZeroEntries(mat_scal);
1071: } else {
1072: MatCreate(PetscObjectComm((PetscObject)A), &mat_scal);
1073: m = PETSC_DECIDE;
1074: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M);
1075: n = PETSC_DECIDE;
1076: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N);
1077: MatSetSizes(mat_scal, m, n, M, N);
1078: MatSetType(mat_scal, MATSCALAPACK);
1079: MatSetUp(mat_scal);
1080: }
1081: for (row = rstart; row < rend; row++) {
1082: MatGetRow(A, row, &ncols, &cols, &vals);
1083: MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES);
1084: MatRestoreRow(A, row, &ncols, &cols, &vals);
1085: }
1086: MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY);
1087: MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY);
1089: if (reuse == MAT_INPLACE_MATRIX) MatHeaderReplace(A, &mat_scal);
1090: else *newmat = mat_scal;
1091: return 0;
1092: }
1094: PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1095: {
1096: Mat mat_scal;
1097: PetscInt M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1098: const PetscInt *cols;
1099: const PetscScalar *vals;
1100: PetscScalar v;
1102: if (reuse == MAT_REUSE_MATRIX) {
1103: mat_scal = *newmat;
1104: MatZeroEntries(mat_scal);
1105: } else {
1106: MatCreate(PetscObjectComm((PetscObject)A), &mat_scal);
1107: m = PETSC_DECIDE;
1108: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M);
1109: n = PETSC_DECIDE;
1110: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N);
1111: MatSetSizes(mat_scal, m, n, M, N);
1112: MatSetType(mat_scal, MATSCALAPACK);
1113: MatSetUp(mat_scal);
1114: }
1115: MatGetRowUpperTriangular(A);
1116: for (row = rstart; row < rend; row++) {
1117: MatGetRow(A, row, &ncols, &cols, &vals);
1118: MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES);
1119: for (j = 0; j < ncols; j++) { /* lower triangular part */
1120: if (cols[j] == row) continue;
1121: v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1122: MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES);
1123: }
1124: MatRestoreRow(A, row, &ncols, &cols, &vals);
1125: }
1126: MatRestoreRowUpperTriangular(A);
1127: MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY);
1128: MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY);
1130: if (reuse == MAT_INPLACE_MATRIX) MatHeaderReplace(A, &mat_scal);
1131: else *newmat = mat_scal;
1132: return 0;
1133: }
1135: static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1136: {
1137: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1138: PetscInt sz = 0;
1140: PetscLayoutSetUp(A->rmap);
1141: PetscLayoutSetUp(A->cmap);
1142: if (!a->lld) a->lld = a->locr;
1144: PetscFree(a->loc);
1145: PetscIntMultError(a->lld, a->locc, &sz);
1146: PetscCalloc1(sz, &a->loc);
1148: A->preallocated = PETSC_TRUE;
1149: return 0;
1150: }
1152: static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1153: {
1154: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1155: Mat_ScaLAPACK_Grid *grid;
1156: PetscBool flg;
1157: MPI_Comm icomm;
1159: MatStashDestroy_Private(&A->stash);
1160: PetscFree(a->loc);
1161: PetscFree(a->pivots);
1162: PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL);
1163: MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg);
1164: if (--grid->grid_refct == 0) {
1165: Cblacs_gridexit(grid->ictxt);
1166: Cblacs_gridexit(grid->ictxrow);
1167: Cblacs_gridexit(grid->ictxcol);
1168: PetscFree(grid);
1169: MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval);
1170: }
1171: PetscCommDestroy(&icomm);
1172: PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL);
1173: PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL);
1174: PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL);
1175: PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL);
1176: PetscFree(A->data);
1177: return 0;
1178: }
1180: PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1181: {
1182: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1183: PetscBLASInt info = 0;
1184: PetscBool flg;
1186: PetscLayoutSetUp(A->rmap);
1187: PetscLayoutSetUp(A->cmap);
1189: /* check that the layout is as enforced by MatCreateScaLAPACK() */
1190: MatScaLAPACKCheckLayout(A->rmap, &flg);
1192: MatScaLAPACKCheckLayout(A->cmap, &flg);
1195: /* compute local sizes */
1196: PetscBLASIntCast(A->rmap->N, &a->M);
1197: PetscBLASIntCast(A->cmap->N, &a->N);
1198: a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1199: a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1200: a->lld = PetscMax(1, a->locr);
1202: /* allocate local array */
1203: MatScaLAPACKSetPreallocation(A);
1205: /* set up ScaLAPACK descriptor */
1206: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1208: return 0;
1209: }
1211: PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type)
1212: {
1213: PetscInt nstash, reallocs;
1215: if (A->nooffprocentries) return 0;
1216: MatStashScatterBegin_Private(A, &A->stash, NULL);
1217: MatStashGetInfo_Private(&A->stash, &nstash, &reallocs);
1218: PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs);
1219: return 0;
1220: }
1222: PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type)
1223: {
1224: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1225: PetscMPIInt n;
1226: PetscInt i, flg, *row, *col;
1227: PetscScalar *val;
1228: PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc;
1230: if (A->nooffprocentries) return 0;
1231: while (1) {
1232: MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg);
1233: if (!flg) break;
1234: for (i = 0; i < n; i++) {
1235: PetscBLASIntCast(row[i] + 1, &gridx);
1236: PetscBLASIntCast(col[i] + 1, &gcidx);
1237: PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1239: switch (A->insertmode) {
1240: case INSERT_VALUES:
1241: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i];
1242: break;
1243: case ADD_VALUES:
1244: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i];
1245: break;
1246: default:
1247: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1248: }
1249: }
1250: }
1251: MatStashScatterEnd_Private(&A->stash);
1252: return 0;
1253: }
1255: PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer)
1256: {
1257: Mat Adense, As;
1258: MPI_Comm comm;
1260: PetscObjectGetComm((PetscObject)newMat, &comm);
1261: MatCreate(comm, &Adense);
1262: MatSetType(Adense, MATDENSE);
1263: MatLoad(Adense, viewer);
1264: MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As);
1265: MatDestroy(&Adense);
1266: MatHeaderReplace(newMat, &As);
1267: return 0;
1268: }
1270: /* -------------------------------------------------------------------*/
1271: static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1272: 0,
1273: 0,
1274: MatMult_ScaLAPACK,
1275: /* 4*/ MatMultAdd_ScaLAPACK,
1276: MatMultTranspose_ScaLAPACK,
1277: MatMultTransposeAdd_ScaLAPACK,
1278: MatSolve_ScaLAPACK,
1279: MatSolveAdd_ScaLAPACK,
1280: 0,
1281: /*10*/ 0,
1282: MatLUFactor_ScaLAPACK,
1283: MatCholeskyFactor_ScaLAPACK,
1284: 0,
1285: MatTranspose_ScaLAPACK,
1286: /*15*/ MatGetInfo_ScaLAPACK,
1287: 0,
1288: MatGetDiagonal_ScaLAPACK,
1289: MatDiagonalScale_ScaLAPACK,
1290: MatNorm_ScaLAPACK,
1291: /*20*/ MatAssemblyBegin_ScaLAPACK,
1292: MatAssemblyEnd_ScaLAPACK,
1293: MatSetOption_ScaLAPACK,
1294: MatZeroEntries_ScaLAPACK,
1295: /*24*/ 0,
1296: MatLUFactorSymbolic_ScaLAPACK,
1297: MatLUFactorNumeric_ScaLAPACK,
1298: MatCholeskyFactorSymbolic_ScaLAPACK,
1299: MatCholeskyFactorNumeric_ScaLAPACK,
1300: /*29*/ MatSetUp_ScaLAPACK,
1301: 0,
1302: 0,
1303: 0,
1304: 0,
1305: /*34*/ MatDuplicate_ScaLAPACK,
1306: 0,
1307: 0,
1308: 0,
1309: 0,
1310: /*39*/ MatAXPY_ScaLAPACK,
1311: 0,
1312: 0,
1313: 0,
1314: MatCopy_ScaLAPACK,
1315: /*44*/ 0,
1316: MatScale_ScaLAPACK,
1317: MatShift_ScaLAPACK,
1318: 0,
1319: 0,
1320: /*49*/ 0,
1321: 0,
1322: 0,
1323: 0,
1324: 0,
1325: /*54*/ 0,
1326: 0,
1327: 0,
1328: 0,
1329: 0,
1330: /*59*/ 0,
1331: MatDestroy_ScaLAPACK,
1332: MatView_ScaLAPACK,
1333: 0,
1334: 0,
1335: /*64*/ 0,
1336: 0,
1337: 0,
1338: 0,
1339: 0,
1340: /*69*/ 0,
1341: 0,
1342: MatConvert_ScaLAPACK_Dense,
1343: 0,
1344: 0,
1345: /*74*/ 0,
1346: 0,
1347: 0,
1348: 0,
1349: 0,
1350: /*79*/ 0,
1351: 0,
1352: 0,
1353: 0,
1354: MatLoad_ScaLAPACK,
1355: /*84*/ 0,
1356: 0,
1357: 0,
1358: 0,
1359: 0,
1360: /*89*/ 0,
1361: 0,
1362: MatMatMultNumeric_ScaLAPACK,
1363: 0,
1364: 0,
1365: /*94*/ 0,
1366: 0,
1367: 0,
1368: MatMatTransposeMultNumeric_ScaLAPACK,
1369: 0,
1370: /*99*/ MatProductSetFromOptions_ScaLAPACK,
1371: 0,
1372: 0,
1373: MatConjugate_ScaLAPACK,
1374: 0,
1375: /*104*/ 0,
1376: 0,
1377: 0,
1378: 0,
1379: 0,
1380: /*109*/ MatMatSolve_ScaLAPACK,
1381: 0,
1382: 0,
1383: 0,
1384: MatMissingDiagonal_ScaLAPACK,
1385: /*114*/ 0,
1386: 0,
1387: 0,
1388: 0,
1389: 0,
1390: /*119*/ 0,
1391: MatHermitianTranspose_ScaLAPACK,
1392: 0,
1393: 0,
1394: 0,
1395: /*124*/ 0,
1396: 0,
1397: 0,
1398: 0,
1399: 0,
1400: /*129*/ 0,
1401: 0,
1402: 0,
1403: 0,
1404: 0,
1405: /*134*/ 0,
1406: 0,
1407: 0,
1408: 0,
1409: 0,
1410: 0,
1411: /*140*/ 0,
1412: 0,
1413: 0,
1414: 0,
1415: 0,
1416: /*145*/ 0,
1417: 0,
1418: 0,
1419: 0,
1420: 0,
1421: /*150*/ 0};
1423: static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners)
1424: {
1425: PetscInt *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2;
1426: PetscInt size = stash->size, nsends;
1427: PetscInt count, *sindices, **rindices, i, j, l;
1428: PetscScalar **rvalues, *svalues;
1429: MPI_Comm comm = stash->comm;
1430: MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1431: PetscMPIInt *sizes, *nlengths, nreceives;
1432: PetscInt *sp_idx, *sp_idy;
1433: PetscScalar *sp_val;
1434: PetscMatStashSpace space, space_next;
1435: PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc;
1436: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)mat->data;
1438: { /* make sure all processors are either in INSERTMODE or ADDMODE */
1439: InsertMode addv;
1440: MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat));
1442: mat->insertmode = addv; /* in case this processor had no cache */
1443: }
1445: bs2 = stash->bs * stash->bs;
1447: /* first count number of contributors to each processor */
1448: PetscCalloc1(size, &nlengths);
1449: PetscMalloc1(stash->n + 1, &owner);
1451: i = j = 0;
1452: space = stash->space_head;
1453: while (space) {
1454: space_next = space->next;
1455: for (l = 0; l < space->local_used; l++) {
1456: PetscBLASIntCast(space->idx[l] + 1, &gridx);
1457: PetscBLASIntCast(space->idy[l] + 1, &gcidx);
1458: PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1459: j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
1460: nlengths[j]++;
1461: owner[i] = j;
1462: i++;
1463: }
1464: space = space_next;
1465: }
1467: /* Now check what procs get messages - and compute nsends. */
1468: PetscCalloc1(size, &sizes);
1469: for (i = 0, nsends = 0; i < size; i++) {
1470: if (nlengths[i]) {
1471: sizes[i] = 1;
1472: nsends++;
1473: }
1474: }
1476: {
1477: PetscMPIInt *onodes, *olengths;
1478: /* Determine the number of messages to expect, their lengths, from from-ids */
1479: PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives);
1480: PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths);
1481: /* since clubbing row,col - lengths are multiplied by 2 */
1482: for (i = 0; i < nreceives; i++) olengths[i] *= 2;
1483: PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1);
1484: /* values are size 'bs2' lengths (and remove earlier factor 2 */
1485: for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2;
1486: PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2);
1487: PetscFree(onodes);
1488: PetscFree(olengths);
1489: }
1491: /* do sends:
1492: 1) starts[i] gives the starting index in svalues for stuff going to
1493: the ith processor
1494: */
1495: PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices);
1496: PetscMalloc1(2 * nsends, &send_waits);
1497: PetscMalloc2(size, &startv, size, &starti);
1498: /* use 2 sends the first with all_a, the next with all_i and all_j */
1499: startv[0] = 0;
1500: starti[0] = 0;
1501: for (i = 1; i < size; i++) {
1502: startv[i] = startv[i - 1] + nlengths[i - 1];
1503: starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1504: }
1506: i = 0;
1507: space = stash->space_head;
1508: while (space) {
1509: space_next = space->next;
1510: sp_idx = space->idx;
1511: sp_idy = space->idy;
1512: sp_val = space->val;
1513: for (l = 0; l < space->local_used; l++) {
1514: j = owner[i];
1515: if (bs2 == 1) {
1516: svalues[startv[j]] = sp_val[l];
1517: } else {
1518: PetscInt k;
1519: PetscScalar *buf1, *buf2;
1520: buf1 = svalues + bs2 * startv[j];
1521: buf2 = space->val + bs2 * l;
1522: for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1523: }
1524: sindices[starti[j]] = sp_idx[l];
1525: sindices[starti[j] + nlengths[j]] = sp_idy[l];
1526: startv[j]++;
1527: starti[j]++;
1528: i++;
1529: }
1530: space = space_next;
1531: }
1532: startv[0] = 0;
1533: for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1535: for (i = 0, count = 0; i < size; i++) {
1536: if (sizes[i]) {
1537: MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++);
1538: MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++);
1539: }
1540: }
1541: #if defined(PETSC_USE_INFO)
1542: PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends);
1543: for (i = 0; i < size; i++) {
1544: if (sizes[i]) PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt))));
1545: }
1546: #endif
1547: PetscFree(nlengths);
1548: PetscFree(owner);
1549: PetscFree2(startv, starti);
1550: PetscFree(sizes);
1552: /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1553: PetscMalloc1(2 * nreceives, &recv_waits);
1555: for (i = 0; i < nreceives; i++) {
1556: recv_waits[2 * i] = recv_waits1[i];
1557: recv_waits[2 * i + 1] = recv_waits2[i];
1558: }
1559: stash->recv_waits = recv_waits;
1561: PetscFree(recv_waits1);
1562: PetscFree(recv_waits2);
1564: stash->svalues = svalues;
1565: stash->sindices = sindices;
1566: stash->rvalues = rvalues;
1567: stash->rindices = rindices;
1568: stash->send_waits = send_waits;
1569: stash->nsends = nsends;
1570: stash->nrecvs = nreceives;
1571: stash->reproduce_count = 0;
1572: return 0;
1573: }
1575: static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb)
1576: {
1577: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1582: PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb);
1583: PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb);
1584: return 0;
1585: }
1587: /*@
1588: MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1589: the `MATSCALAPACK` matrix
1591: Logically Collective
1593: Input Parameters:
1594: + A - a `MATSCALAPACK` matrix
1595: . mb - the row block size
1596: - nb - the column block size
1598: Level: intermediate
1600: .seealso: `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1601: @*/
1602: PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb)
1603: {
1607: PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
1608: return 0;
1609: }
1611: static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb)
1612: {
1613: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1615: if (mb) *mb = a->mb;
1616: if (nb) *nb = a->nb;
1617: return 0;
1618: }
1620: /*@
1621: MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1622: the `MATSCALAPACK` matrix
1624: Not collective
1626: Input Parameter:
1627: . A - a `MATSCALAPACK` matrix
1629: Output Parameters:
1630: + mb - the row block size
1631: - nb - the column block size
1633: Level: intermediate
1635: .seealso: `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1636: @*/
1637: PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb)
1638: {
1640: PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
1641: return 0;
1642: }
1644: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1645: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1647: /*MC
1648: MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1650: Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1652: Options Database Keys:
1653: + -mat_type scalapack - sets the matrix type to `MATSCALAPACK` during a call to `MatSetFromOptions()`
1654: . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option -pc_type lu
1655: . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1656: - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1658: Note:
1659: Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1660: range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1661: the given rank.
1663: Level: beginner
1665: .seealso: `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`
1666: M*/
1668: PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1669: {
1670: Mat_ScaLAPACK *a;
1671: PetscBool flg, flg1;
1672: Mat_ScaLAPACK_Grid *grid;
1673: MPI_Comm icomm;
1674: PetscBLASInt nprow, npcol, myrow, mycol;
1675: PetscInt optv1, k = 2, array[2] = {0, 0};
1676: PetscMPIInt size;
1678: PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps));
1679: A->insertmode = NOT_SET_VALUES;
1681: MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash);
1682: A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK;
1683: A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1684: A->stash.ScatterEnd = MatStashScatterEnd_Ref;
1685: A->stash.ScatterDestroy = NULL;
1687: PetscNew(&a);
1688: A->data = (void *)a;
1690: /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1691: if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1692: MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0);
1693: PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);
1694: PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite);
1695: }
1696: PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL);
1697: MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg);
1698: if (!flg) {
1699: PetscNew(&grid);
1701: MPI_Comm_size(icomm, &size);
1702: grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001);
1704: PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
1705: PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1);
1706: if (flg1) {
1708: grid->nprow = optv1;
1709: }
1710: PetscOptionsEnd();
1712: if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1713: grid->npcol = size / grid->nprow;
1714: PetscBLASIntCast(grid->nprow, &nprow);
1715: PetscBLASIntCast(grid->npcol, &npcol);
1716: grid->ictxt = Csys2blacs_handle(icomm);
1717: Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1718: Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1719: grid->grid_refct = 1;
1720: grid->nprow = nprow;
1721: grid->npcol = npcol;
1722: grid->myrow = myrow;
1723: grid->mycol = mycol;
1724: /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1725: grid->ictxrow = Csys2blacs_handle(icomm);
1726: Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1727: grid->ictxcol = Csys2blacs_handle(icomm);
1728: Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
1729: MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid);
1731: } else grid->grid_refct++;
1732: PetscCommDestroy(&icomm);
1733: a->grid = grid;
1734: a->mb = DEFAULT_BLOCKSIZE;
1735: a->nb = DEFAULT_BLOCKSIZE;
1737: PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
1738: PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg);
1739: if (flg) {
1740: a->mb = array[0];
1741: a->nb = (k > 1) ? array[1] : a->mb;
1742: }
1743: PetscOptionsEnd();
1745: a->roworiented = PETSC_TRUE;
1746: PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK);
1747: PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK);
1748: PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK);
1749: PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK);
1750: return 0;
1751: }
1753: /*@C
1754: MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1755: (2D block cyclic distribution) for a `MATSCALAPACK` matrix
1757: Collective
1759: Input Parameters:
1760: + comm - MPI communicator
1761: . mb - row block size (or `PETSC_DECIDE` to have it set)
1762: . nb - column block size (or `PETSC_DECIDE` to have it set)
1763: . M - number of global rows
1764: . N - number of global columns
1765: . rsrc - coordinate of process that owns the first row of the distributed matrix
1766: - csrc - coordinate of process that owns the first column of the distributed matrix
1768: Output Parameter:
1769: . A - the matrix
1771: Options Database Key:
1772: . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1774: It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1775: MatXXXXSetPreallocation() paradigm instead of this routine directly.
1776: [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
1778: Note:
1779: If `PETSC_DECIDE` is used for the block sizes, then an appropriate value
1780: is chosen.
1782: Storage Information:
1783: Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1784: configured with ScaLAPACK. In particular, PETSc's local sizes lose
1785: significance and are thus ignored. The block sizes refer to the values
1786: used for the distributed matrix, not the same meaning as in `MATBAIJ`.
1788: Level: intermediate
1790: .seealso: `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1791: @*/
1792: PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A)
1793: {
1794: Mat_ScaLAPACK *a;
1795: PetscInt m, n;
1797: MatCreate(comm, A);
1798: MatSetType(*A, MATSCALAPACK);
1800: /* rows and columns are NOT distributed according to PetscSplitOwnership */
1801: m = PETSC_DECIDE;
1802: PetscSplitOwnershipEqual(comm, &m, &M);
1803: n = PETSC_DECIDE;
1804: PetscSplitOwnershipEqual(comm, &n, &N);
1805: MatSetSizes(*A, m, n, M, N);
1806: a = (Mat_ScaLAPACK *)(*A)->data;
1807: PetscBLASIntCast(M, &a->M);
1808: PetscBLASIntCast(N, &a->N);
1809: PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb);
1810: PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb);
1811: PetscBLASIntCast(rsrc, &a->rsrc);
1812: PetscBLASIntCast(csrc, &a->csrc);
1813: MatSetUp(*A);
1814: return 0;
1815: }