Actual source code: dense.c

  1: /*
  2:      Defines the basic matrix operations for sequential dense.
  3:      Portions of this code are under:
  4:      Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
  5: */

  7: #include <../src/mat/impls/dense/seq/dense.h>
  8: #include <petscblaslapack.h>
  9: #include <../src/mat/impls/aij/seq/aij.h>

 11: PetscErrorCode MatSeqDenseSymmetrize_Private(Mat A, PetscBool hermitian)
 12: {
 13:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
 14:   PetscInt      j, k, n = A->rmap->n;
 15:   PetscScalar  *v;

 18:   MatDenseGetArray(A, &v);
 19:   if (!hermitian) {
 20:     for (k = 0; k < n; k++) {
 21:       for (j = k; j < n; j++) v[j * mat->lda + k] = v[k * mat->lda + j];
 22:     }
 23:   } else {
 24:     for (k = 0; k < n; k++) {
 25:       for (j = k; j < n; j++) v[j * mat->lda + k] = PetscConj(v[k * mat->lda + j]);
 26:     }
 27:   }
 28:   MatDenseRestoreArray(A, &v);
 29:   return 0;
 30: }

 32: PETSC_EXTERN PetscErrorCode MatSeqDenseInvertFactors_Private(Mat A)
 33: {
 34:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
 35:   PetscBLASInt  info, n;

 37:   if (!A->rmap->n || !A->cmap->n) return 0;
 38:   PetscBLASIntCast(A->cmap->n, &n);
 39:   if (A->factortype == MAT_FACTOR_LU) {
 41:     if (!mat->fwork) {
 42:       mat->lfwork = n;
 43:       PetscMalloc1(mat->lfwork, &mat->fwork);
 44:     }
 45:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 46:     PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info));
 47:     PetscFPTrapPop();
 48:     PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0);
 49:   } else if (A->factortype == MAT_FACTOR_CHOLESKY) {
 50:     if (A->spd == PETSC_BOOL3_TRUE) {
 51:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 52:       PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &n, mat->v, &mat->lda, &info));
 53:       PetscFPTrapPop();
 54:       MatSeqDenseSymmetrize_Private(A, PETSC_TRUE);
 55: #if defined(PETSC_USE_COMPLEX)
 56:     } else if (A->hermitian == PETSC_BOOL3_TRUE) {
 59:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 60:       PetscCallBLAS("LAPACKhetri", LAPACKhetri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info));
 61:       PetscFPTrapPop();
 62:       MatSeqDenseSymmetrize_Private(A, PETSC_TRUE);
 63: #endif
 64:     } else { /* symmetric case */
 67:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 68:       PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info));
 69:       PetscFPTrapPop();
 70:       MatSeqDenseSymmetrize_Private(A, PETSC_FALSE);
 71:     }
 73:     PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0);
 74:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix must be factored to solve");

 76:   A->ops->solve             = NULL;
 77:   A->ops->matsolve          = NULL;
 78:   A->ops->solvetranspose    = NULL;
 79:   A->ops->matsolvetranspose = NULL;
 80:   A->ops->solveadd          = NULL;
 81:   A->ops->solvetransposeadd = NULL;
 82:   A->factortype             = MAT_FACTOR_NONE;
 83:   PetscFree(A->solvertype);
 84:   return 0;
 85: }

 87: PetscErrorCode MatZeroRowsColumns_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
 88: {
 89:   Mat_SeqDense      *l = (Mat_SeqDense *)A->data;
 90:   PetscInt           m = l->lda, n = A->cmap->n, r = A->rmap->n, i, j;
 91:   PetscScalar       *slot, *bb, *v;
 92:   const PetscScalar *xx;

 94:   if (PetscDefined(USE_DEBUG)) {
 95:     for (i = 0; i < N; i++) {
 99:     }
100:   }
101:   if (!N) return 0;

103:   /* fix right hand side if needed */
104:   if (x && b) {
105:     Vec xt;

108:     VecDuplicate(x, &xt);
109:     VecCopy(x, xt);
110:     VecScale(xt, -1.0);
111:     MatMultAdd(A, xt, b, b);
112:     VecDestroy(&xt);
113:     VecGetArrayRead(x, &xx);
114:     VecGetArray(b, &bb);
115:     for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]];
116:     VecRestoreArrayRead(x, &xx);
117:     VecRestoreArray(b, &bb);
118:   }

120:   MatDenseGetArray(A, &v);
121:   for (i = 0; i < N; i++) {
122:     slot = v + rows[i] * m;
123:     PetscArrayzero(slot, r);
124:   }
125:   for (i = 0; i < N; i++) {
126:     slot = v + rows[i];
127:     for (j = 0; j < n; j++) {
128:       *slot = 0.0;
129:       slot += m;
130:     }
131:   }
132:   if (diag != 0.0) {
134:     for (i = 0; i < N; i++) {
135:       slot  = v + (m + 1) * rows[i];
136:       *slot = diag;
137:     }
138:   }
139:   MatDenseRestoreArray(A, &v);
140:   return 0;
141: }

143: PetscErrorCode MatPtAPNumeric_SeqDense_SeqDense(Mat A, Mat P, Mat C)
144: {
145:   Mat_SeqDense *c = (Mat_SeqDense *)(C->data);

147:   if (c->ptapwork) {
148:     (*C->ops->matmultnumeric)(A, P, c->ptapwork);
149:     (*C->ops->transposematmultnumeric)(P, c->ptapwork, C);
150:   } else SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Must call MatPtAPSymbolic_SeqDense_SeqDense() first");
151:   return 0;
152: }

154: PetscErrorCode MatPtAPSymbolic_SeqDense_SeqDense(Mat A, Mat P, PetscReal fill, Mat C)
155: {
156:   Mat_SeqDense *c;
157:   PetscBool     cisdense = PETSC_FALSE;

159:   MatSetSizes(C, P->cmap->n, P->cmap->n, P->cmap->N, P->cmap->N);
160: #if defined(PETSC_HAVE_CUDA)
161:   PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "");
162: #elif (PETSC_HAVE_HIP)
163:   PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "");
164: #endif

166:   if (!cisdense) {
167:     PetscBool flg;

169:     PetscObjectTypeCompare((PetscObject)P, ((PetscObject)A)->type_name, &flg);
170:     MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE);
171:   }
172:   MatSetUp(C);
173:   c = (Mat_SeqDense *)C->data;
174:   MatCreate(PetscObjectComm((PetscObject)A), &c->ptapwork);
175:   MatSetSizes(c->ptapwork, A->rmap->n, P->cmap->n, A->rmap->N, P->cmap->N);
176:   MatSetType(c->ptapwork, ((PetscObject)C)->type_name);
177:   MatSetUp(c->ptapwork);
178:   return 0;
179: }

181: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
182: {
183:   Mat              B = NULL;
184:   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data;
185:   Mat_SeqDense    *b;
186:   PetscInt        *ai = a->i, *aj = a->j, m = A->rmap->N, n = A->cmap->N, i;
187:   const MatScalar *av;
188:   PetscBool        isseqdense;

190:   if (reuse == MAT_REUSE_MATRIX) {
191:     PetscObjectTypeCompare((PetscObject)*newmat, MATSEQDENSE, &isseqdense);
193:   }
194:   if (reuse != MAT_REUSE_MATRIX) {
195:     MatCreate(PetscObjectComm((PetscObject)A), &B);
196:     MatSetSizes(B, m, n, m, n);
197:     MatSetType(B, MATSEQDENSE);
198:     MatSeqDenseSetPreallocation(B, NULL);
199:     b = (Mat_SeqDense *)(B->data);
200:   } else {
201:     b = (Mat_SeqDense *)((*newmat)->data);
202:     PetscArrayzero(b->v, m * n);
203:   }
204:   MatSeqAIJGetArrayRead(A, &av);
205:   for (i = 0; i < m; i++) {
206:     PetscInt j;
207:     for (j = 0; j < ai[1] - ai[0]; j++) {
208:       b->v[*aj * m + i] = *av;
209:       aj++;
210:       av++;
211:     }
212:     ai++;
213:   }
214:   MatSeqAIJRestoreArrayRead(A, &av);

216:   if (reuse == MAT_INPLACE_MATRIX) {
217:     MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
218:     MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
219:     MatHeaderReplace(A, &B);
220:   } else {
221:     if (B) *newmat = B;
222:     MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY);
223:     MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY);
224:   }
225:   return 0;
226: }

228: PETSC_INTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
229: {
230:   Mat           B = NULL;
231:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
232:   PetscInt      i, j;
233:   PetscInt     *rows, *nnz;
234:   MatScalar    *aa = a->v, *vals;

236:   PetscCalloc3(A->rmap->n, &rows, A->rmap->n, &nnz, A->rmap->n, &vals);
237:   if (reuse != MAT_REUSE_MATRIX) {
238:     MatCreate(PetscObjectComm((PetscObject)A), &B);
239:     MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N);
240:     MatSetType(B, MATSEQAIJ);
241:     for (j = 0; j < A->cmap->n; j++) {
242:       for (i = 0; i < A->rmap->n; i++)
243:         if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) ++nnz[i];
244:       aa += a->lda;
245:     }
246:     MatSeqAIJSetPreallocation(B, PETSC_DETERMINE, nnz);
247:   } else B = *newmat;
248:   aa = a->v;
249:   for (j = 0; j < A->cmap->n; j++) {
250:     PetscInt numRows = 0;
251:     for (i = 0; i < A->rmap->n; i++)
252:       if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) {
253:         rows[numRows]   = i;
254:         vals[numRows++] = aa[i];
255:       }
256:     MatSetValues(B, numRows, rows, 1, &j, vals, INSERT_VALUES);
257:     aa += a->lda;
258:   }
259:   PetscFree3(rows, nnz, vals);
260:   MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
261:   MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);

263:   if (reuse == MAT_INPLACE_MATRIX) {
264:     MatHeaderReplace(A, &B);
265:   } else if (reuse != MAT_REUSE_MATRIX) *newmat = B;
266:   return 0;
267: }

269: PetscErrorCode MatAXPY_SeqDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
270: {
271:   Mat_SeqDense      *x = (Mat_SeqDense *)X->data, *y = (Mat_SeqDense *)Y->data;
272:   const PetscScalar *xv;
273:   PetscScalar       *yv;
274:   PetscBLASInt       N, m, ldax = 0, lday = 0, one = 1;

276:   MatDenseGetArrayRead(X, &xv);
277:   MatDenseGetArray(Y, &yv);
278:   PetscBLASIntCast(X->rmap->n * X->cmap->n, &N);
279:   PetscBLASIntCast(X->rmap->n, &m);
280:   PetscBLASIntCast(x->lda, &ldax);
281:   PetscBLASIntCast(y->lda, &lday);
282:   if (ldax > m || lday > m) {
283:     PetscInt j;

285:     for (j = 0; j < X->cmap->n; j++) PetscCallBLAS("BLASaxpy", BLASaxpy_(&m, &alpha, xv + j * ldax, &one, yv + j * lday, &one));
286:   } else {
287:     PetscCallBLAS("BLASaxpy", BLASaxpy_(&N, &alpha, xv, &one, yv, &one));
288:   }
289:   MatDenseRestoreArrayRead(X, &xv);
290:   MatDenseRestoreArray(Y, &yv);
291:   PetscLogFlops(PetscMax(2.0 * N - 1, 0));
292:   return 0;
293: }

295: static PetscErrorCode MatGetInfo_SeqDense(Mat A, MatInfoType flag, MatInfo *info)
296: {
297:   PetscLogDouble N = A->rmap->n * A->cmap->n;

299:   info->block_size        = 1.0;
300:   info->nz_allocated      = N;
301:   info->nz_used           = N;
302:   info->nz_unneeded       = 0;
303:   info->assemblies        = A->num_ass;
304:   info->mallocs           = 0;
305:   info->memory            = 0; /* REVIEW ME */
306:   info->fill_ratio_given  = 0;
307:   info->fill_ratio_needed = 0;
308:   info->factor_mallocs    = 0;
309:   return 0;
310: }

312: PetscErrorCode MatScale_SeqDense(Mat A, PetscScalar alpha)
313: {
314:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
315:   PetscScalar  *v;
316:   PetscBLASInt  one = 1, j, nz, lda = 0;

318:   MatDenseGetArray(A, &v);
319:   PetscBLASIntCast(a->lda, &lda);
320:   if (lda > A->rmap->n) {
321:     PetscBLASIntCast(A->rmap->n, &nz);
322:     for (j = 0; j < A->cmap->n; j++) PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v + j * lda, &one));
323:   } else {
324:     PetscBLASIntCast(A->rmap->n * A->cmap->n, &nz);
325:     PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v, &one));
326:   }
327:   PetscLogFlops(A->rmap->n * A->cmap->n);
328:   MatDenseRestoreArray(A, &v);
329:   return 0;
330: }

332: PetscErrorCode MatShift_SeqDense(Mat A, PetscScalar alpha)
333: {
334:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
335:   PetscScalar  *v;
336:   PetscInt      j, k;

338:   MatDenseGetArray(A, &v);
339:   k = PetscMin(A->rmap->n, A->cmap->n);
340:   for (j = 0; j < k; j++) v[j + j * a->lda] += alpha;
341:   PetscLogFlops(k);
342:   MatDenseRestoreArray(A, &v);
343:   return 0;
344: }

346: static PetscErrorCode MatIsHermitian_SeqDense(Mat A, PetscReal rtol, PetscBool *fl)
347: {
348:   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
349:   PetscInt           i, j, m = A->rmap->n, N = a->lda;
350:   const PetscScalar *v;

352:   *fl = PETSC_FALSE;
353:   if (A->rmap->n != A->cmap->n) return 0;
354:   MatDenseGetArrayRead(A, &v);
355:   for (i = 0; i < m; i++) {
356:     for (j = i; j < m; j++) {
357:       if (PetscAbsScalar(v[i + j * N] - PetscConj(v[j + i * N])) > rtol) goto restore;
358:     }
359:   }
360:   *fl = PETSC_TRUE;
361: restore:
362:   MatDenseRestoreArrayRead(A, &v);
363:   return 0;
364: }

366: static PetscErrorCode MatIsSymmetric_SeqDense(Mat A, PetscReal rtol, PetscBool *fl)
367: {
368:   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
369:   PetscInt           i, j, m = A->rmap->n, N = a->lda;
370:   const PetscScalar *v;

372:   *fl = PETSC_FALSE;
373:   if (A->rmap->n != A->cmap->n) return 0;
374:   MatDenseGetArrayRead(A, &v);
375:   for (i = 0; i < m; i++) {
376:     for (j = i; j < m; j++) {
377:       if (PetscAbsScalar(v[i + j * N] - v[j + i * N]) > rtol) goto restore;
378:     }
379:   }
380:   *fl = PETSC_TRUE;
381: restore:
382:   MatDenseRestoreArrayRead(A, &v);
383:   return 0;
384: }

386: PetscErrorCode MatDuplicateNoCreate_SeqDense(Mat newi, Mat A, MatDuplicateOption cpvalues)
387: {
388:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
389:   PetscInt      lda = (PetscInt)mat->lda, j, m, nlda = lda;
390:   PetscBool     isdensecpu;

392:   PetscLayoutReference(A->rmap, &newi->rmap);
393:   PetscLayoutReference(A->cmap, &newi->cmap);
394:   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { /* propagate LDA */
395:     MatDenseSetLDA(newi, lda);
396:   }
397:   PetscObjectTypeCompare((PetscObject)newi, MATSEQDENSE, &isdensecpu);
398:   if (isdensecpu) MatSeqDenseSetPreallocation(newi, NULL);
399:   if (cpvalues == MAT_COPY_VALUES) {
400:     const PetscScalar *av;
401:     PetscScalar       *v;

403:     MatDenseGetArrayRead(A, &av);
404:     MatDenseGetArrayWrite(newi, &v);
405:     MatDenseGetLDA(newi, &nlda);
406:     m = A->rmap->n;
407:     if (lda > m || nlda > m) {
408:       for (j = 0; j < A->cmap->n; j++) PetscArraycpy(v + j * nlda, av + j * lda, m);
409:     } else {
410:       PetscArraycpy(v, av, A->rmap->n * A->cmap->n);
411:     }
412:     MatDenseRestoreArrayWrite(newi, &v);
413:     MatDenseRestoreArrayRead(A, &av);
414:   }
415:   return 0;
416: }

418: PetscErrorCode MatDuplicate_SeqDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
419: {
420:   MatCreate(PetscObjectComm((PetscObject)A), newmat);
421:   MatSetSizes(*newmat, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n);
422:   MatSetType(*newmat, ((PetscObject)A)->type_name);
423:   MatDuplicateNoCreate_SeqDense(*newmat, A, cpvalues);
424:   return 0;
425: }

427: static PetscErrorCode MatSolve_SeqDense_Internal_LU(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T)
428: {
429:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
430:   PetscBLASInt  info;

432:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
433:   PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_(T ? "T" : "N", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
434:   PetscFPTrapPop();
436:   PetscLogFlops(nrhs * (2.0 * m * m - m));
437:   return 0;
438: }

440: static PetscErrorCode MatConjugate_SeqDense(Mat);

442: static PetscErrorCode MatSolve_SeqDense_Internal_Cholesky(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T)
443: {
444:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
445:   PetscBLASInt  info;

447:   if (A->spd == PETSC_BOOL3_TRUE) {
448:     if (PetscDefined(USE_COMPLEX) && T) MatConjugate_SeqDense(A);
449:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
450:     PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &m, &nrhs, mat->v, &mat->lda, x, &m, &info));
451:     PetscFPTrapPop();
453:     if (PetscDefined(USE_COMPLEX) && T) MatConjugate_SeqDense(A);
454: #if defined(PETSC_USE_COMPLEX)
455:   } else if (A->hermitian == PETSC_BOOL3_TRUE) {
456:     if (T) MatConjugate_SeqDense(A);
457:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
458:     PetscCallBLAS("LAPACKhetrs", LAPACKhetrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
459:     PetscFPTrapPop();
461:     if (T) MatConjugate_SeqDense(A);
462: #endif
463:   } else { /* symmetric case */
464:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
465:     PetscCallBLAS("LAPACKsytrs", LAPACKsytrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
466:     PetscFPTrapPop();
468:   }
469:   PetscLogFlops(nrhs * (2.0 * m * m - m));
470:   return 0;
471: }

473: static PetscErrorCode MatSolve_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k)
474: {
475:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
476:   PetscBLASInt  info;
477:   char          trans;

479:   if (PetscDefined(USE_COMPLEX)) {
480:     trans = 'C';
481:   } else {
482:     trans = 'T';
483:   }
484:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
485:   { /* lwork depends on the number of right-hand sides */
486:     PetscBLASInt nlfwork, lfwork = -1;
487:     PetscScalar  fwork;

489:     PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info));
490:     nlfwork = (PetscBLASInt)PetscRealPart(fwork);
491:     if (nlfwork > mat->lfwork) {
492:       mat->lfwork = nlfwork;
493:       PetscFree(mat->fwork);
494:       PetscMalloc1(mat->lfwork, &mat->fwork);
495:     }
496:   }
497:   PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info));
498:   PetscFPTrapPop();
500:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
501:   PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &mat->rank, &nrhs, mat->v, &mat->lda, x, &ldx, &info));
502:   PetscFPTrapPop();
504:   for (PetscInt j = 0; j < nrhs; j++) {
505:     for (PetscInt i = mat->rank; i < k; i++) x[j * ldx + i] = 0.;
506:   }
507:   PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)));
508:   return 0;
509: }

511: static PetscErrorCode MatSolveTranspose_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k)
512: {
513:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
514:   PetscBLASInt  info;

516:   if (A->rmap->n == A->cmap->n && mat->rank == A->rmap->n) {
517:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
518:     PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &m, &nrhs, mat->v, &mat->lda, x, &ldx, &info));
519:     PetscFPTrapPop();
521:     if (PetscDefined(USE_COMPLEX)) MatConjugate_SeqDense(A);
522:     { /* lwork depends on the number of right-hand sides */
523:       PetscBLASInt nlfwork, lfwork = -1;
524:       PetscScalar  fwork;

526:       PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info));
527:       nlfwork = (PetscBLASInt)PetscRealPart(fwork);
528:       if (nlfwork > mat->lfwork) {
529:         mat->lfwork = nlfwork;
530:         PetscFree(mat->fwork);
531:         PetscMalloc1(mat->lfwork, &mat->fwork);
532:       }
533:     }
534:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
535:     PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info));
536:     PetscFPTrapPop();
538:     if (PetscDefined(USE_COMPLEX)) MatConjugate_SeqDense(A);
539:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "QR factored matrix cannot be used for transpose solve");
540:   PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)));
541:   return 0;
542: }

544: static PetscErrorCode MatSolve_SeqDense_SetUp(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k)
545: {
546:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
547:   PetscScalar  *y;
548:   PetscBLASInt  m = 0, k = 0;

550:   PetscBLASIntCast(A->rmap->n, &m);
551:   PetscBLASIntCast(A->cmap->n, &k);
552:   if (k < m) {
553:     VecCopy(xx, mat->qrrhs);
554:     VecGetArray(mat->qrrhs, &y);
555:   } else {
556:     VecCopy(xx, yy);
557:     VecGetArray(yy, &y);
558:   }
559:   *_y = y;
560:   *_k = k;
561:   *_m = m;
562:   return 0;
563: }

565: static PetscErrorCode MatSolve_SeqDense_TearDown(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k)
566: {
567:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
568:   PetscScalar  *y   = NULL;
569:   PetscBLASInt  m, k;

571:   y   = *_y;
572:   *_y = NULL;
573:   k   = *_k;
574:   m   = *_m;
575:   if (k < m) {
576:     PetscScalar *yv;
577:     VecGetArray(yy, &yv);
578:     PetscArraycpy(yv, y, k);
579:     VecRestoreArray(yy, &yv);
580:     VecRestoreArray(mat->qrrhs, &y);
581:   } else {
582:     VecRestoreArray(yy, &y);
583:   }
584:   return 0;
585: }

587: static PetscErrorCode MatSolve_SeqDense_LU(Mat A, Vec xx, Vec yy)
588: {
589:   PetscScalar *y = NULL;
590:   PetscBLASInt m = 0, k = 0;

592:   MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
593:   MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_FALSE);
594:   MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
595:   return 0;
596: }

598: static PetscErrorCode MatSolveTranspose_SeqDense_LU(Mat A, Vec xx, Vec yy)
599: {
600:   PetscScalar *y = NULL;
601:   PetscBLASInt m = 0, k = 0;

603:   MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
604:   MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_TRUE);
605:   MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
606:   return 0;
607: }

609: static PetscErrorCode MatSolve_SeqDense_Cholesky(Mat A, Vec xx, Vec yy)
610: {
611:   PetscScalar *y = NULL;
612:   PetscBLASInt m = 0, k = 0;

614:   MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
615:   MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_FALSE);
616:   MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
617:   return 0;
618: }

620: static PetscErrorCode MatSolveTranspose_SeqDense_Cholesky(Mat A, Vec xx, Vec yy)
621: {
622:   PetscScalar *y = NULL;
623:   PetscBLASInt m = 0, k = 0;

625:   MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
626:   MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_TRUE);
627:   MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
628:   return 0;
629: }

631: static PetscErrorCode MatSolve_SeqDense_QR(Mat A, Vec xx, Vec yy)
632: {
633:   PetscScalar *y = NULL;
634:   PetscBLASInt m = 0, k = 0;

636:   MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
637:   MatSolve_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k);
638:   MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
639:   return 0;
640: }

642: static PetscErrorCode MatSolveTranspose_SeqDense_QR(Mat A, Vec xx, Vec yy)
643: {
644:   PetscScalar *y = NULL;
645:   PetscBLASInt m = 0, k = 0;

647:   MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
648:   MatSolveTranspose_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k);
649:   MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
650:   return 0;
651: }

653: static PetscErrorCode MatMatSolve_SeqDense_SetUp(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k)
654: {
655:   const PetscScalar *b;
656:   PetscScalar       *y;
657:   PetscInt           n, _ldb, _ldx;
658:   PetscBLASInt       nrhs = 0, m = 0, k = 0, ldb = 0, ldx = 0, ldy = 0;

660:   *_ldy  = 0;
661:   *_m    = 0;
662:   *_nrhs = 0;
663:   *_k    = 0;
664:   *_y    = NULL;
665:   PetscBLASIntCast(A->rmap->n, &m);
666:   PetscBLASIntCast(A->cmap->n, &k);
667:   MatGetSize(B, NULL, &n);
668:   PetscBLASIntCast(n, &nrhs);
669:   MatDenseGetLDA(B, &_ldb);
670:   PetscBLASIntCast(_ldb, &ldb);
671:   MatDenseGetLDA(X, &_ldx);
672:   PetscBLASIntCast(_ldx, &ldx);
673:   if (ldx < m) {
674:     MatDenseGetArrayRead(B, &b);
675:     PetscMalloc1(nrhs * m, &y);
676:     if (ldb == m) {
677:       PetscArraycpy(y, b, ldb * nrhs);
678:     } else {
679:       for (PetscInt j = 0; j < nrhs; j++) PetscArraycpy(&y[j * m], &b[j * ldb], m);
680:     }
681:     ldy = m;
682:     MatDenseRestoreArrayRead(B, &b);
683:   } else {
684:     if (ldb == ldx) {
685:       MatCopy(B, X, SAME_NONZERO_PATTERN);
686:       MatDenseGetArray(X, &y);
687:     } else {
688:       MatDenseGetArray(X, &y);
689:       MatDenseGetArrayRead(B, &b);
690:       for (PetscInt j = 0; j < nrhs; j++) PetscArraycpy(&y[j * ldx], &b[j * ldb], m);
691:       MatDenseRestoreArrayRead(B, &b);
692:     }
693:     ldy = ldx;
694:   }
695:   *_y    = y;
696:   *_ldy  = ldy;
697:   *_k    = k;
698:   *_m    = m;
699:   *_nrhs = nrhs;
700:   return 0;
701: }

703: static PetscErrorCode MatMatSolve_SeqDense_TearDown(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k)
704: {
705:   PetscScalar *y;
706:   PetscInt     _ldx;
707:   PetscBLASInt k, ldy, nrhs, ldx = 0;

709:   y    = *_y;
710:   *_y  = NULL;
711:   k    = *_k;
712:   ldy  = *_ldy;
713:   nrhs = *_nrhs;
714:   MatDenseGetLDA(X, &_ldx);
715:   PetscBLASIntCast(_ldx, &ldx);
716:   if (ldx != ldy) {
717:     PetscScalar *xv;
718:     MatDenseGetArray(X, &xv);
719:     for (PetscInt j = 0; j < nrhs; j++) PetscArraycpy(&xv[j * ldx], &y[j * ldy], k);
720:     MatDenseRestoreArray(X, &xv);
721:     PetscFree(y);
722:   } else {
723:     MatDenseRestoreArray(X, &y);
724:   }
725:   return 0;
726: }

728: static PetscErrorCode MatMatSolve_SeqDense_LU(Mat A, Mat B, Mat X)
729: {
730:   PetscScalar *y;
731:   PetscBLASInt m, k, ldy, nrhs;

733:   MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
734:   MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_FALSE);
735:   MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
736:   return 0;
737: }

739: static PetscErrorCode MatMatSolveTranspose_SeqDense_LU(Mat A, Mat B, Mat X)
740: {
741:   PetscScalar *y;
742:   PetscBLASInt m, k, ldy, nrhs;

744:   MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
745:   MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_TRUE);
746:   MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
747:   return 0;
748: }

750: static PetscErrorCode MatMatSolve_SeqDense_Cholesky(Mat A, Mat B, Mat X)
751: {
752:   PetscScalar *y;
753:   PetscBLASInt m, k, ldy, nrhs;

755:   MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
756:   MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_FALSE);
757:   MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
758:   return 0;
759: }

761: static PetscErrorCode MatMatSolveTranspose_SeqDense_Cholesky(Mat A, Mat B, Mat X)
762: {
763:   PetscScalar *y;
764:   PetscBLASInt m, k, ldy, nrhs;

766:   MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
767:   MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_TRUE);
768:   MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
769:   return 0;
770: }

772: static PetscErrorCode MatMatSolve_SeqDense_QR(Mat A, Mat B, Mat X)
773: {
774:   PetscScalar *y;
775:   PetscBLASInt m, k, ldy, nrhs;

777:   MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
778:   MatSolve_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k);
779:   MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
780:   return 0;
781: }

783: static PetscErrorCode MatMatSolveTranspose_SeqDense_QR(Mat A, Mat B, Mat X)
784: {
785:   PetscScalar *y;
786:   PetscBLASInt m, k, ldy, nrhs;

788:   MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
789:   MatSolveTranspose_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k);
790:   MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
791:   return 0;
792: }

794: /* ---------------------------------------------------------------*/
795: /* COMMENT: I have chosen to hide row permutation in the pivots,
796:    rather than put it in the Mat->row slot.*/
797: PetscErrorCode MatLUFactor_SeqDense(Mat A, IS row, IS col, const MatFactorInfo *minfo)
798: {
799:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
800:   PetscBLASInt  n, m, info;

802:   PetscBLASIntCast(A->cmap->n, &n);
803:   PetscBLASIntCast(A->rmap->n, &m);
804:   if (!mat->pivots) { PetscMalloc1(A->rmap->n, &mat->pivots); }
805:   if (!A->rmap->n || !A->cmap->n) return 0;
806:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
807:   PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&m, &n, mat->v, &mat->lda, mat->pivots, &info));
808:   PetscFPTrapPop();


813:   A->ops->solve             = MatSolve_SeqDense_LU;
814:   A->ops->matsolve          = MatMatSolve_SeqDense_LU;
815:   A->ops->solvetranspose    = MatSolveTranspose_SeqDense_LU;
816:   A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_LU;
817:   A->factortype             = MAT_FACTOR_LU;

819:   PetscFree(A->solvertype);
820:   PetscStrallocpy(MATSOLVERPETSC, &A->solvertype);

822:   PetscLogFlops((2.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3);
823:   return 0;
824: }

826: static PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy)
827: {
828:   MatFactorInfo info;

830:   MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES);
831:   PetscUseTypeMethod(fact, lufactor, NULL, NULL, &info);
832:   return 0;
833: }

835: PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
836: {
837:   fact->preallocated         = PETSC_TRUE;
838:   fact->assembled            = PETSC_TRUE;
839:   fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense;
840:   return 0;
841: }

843: /* Cholesky as L*L^T or L*D*L^T and the symmetric/hermitian complex variants */
844: PetscErrorCode MatCholeskyFactor_SeqDense(Mat A, IS perm, const MatFactorInfo *factinfo)
845: {
846:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
847:   PetscBLASInt  info, n;

849:   PetscBLASIntCast(A->cmap->n, &n);
850:   if (!A->rmap->n || !A->cmap->n) return 0;
851:   if (A->spd == PETSC_BOOL3_TRUE) {
852:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
853:     PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &n, mat->v, &mat->lda, &info));
854:     PetscFPTrapPop();
855: #if defined(PETSC_USE_COMPLEX)
856:   } else if (A->hermitian == PETSC_BOOL3_TRUE) {
857:     if (!mat->pivots) { PetscMalloc1(A->rmap->n, &mat->pivots); }
858:     if (!mat->fwork) {
859:       PetscScalar dummy;

861:       mat->lfwork = -1;
862:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
863:       PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info));
864:       PetscFPTrapPop();
865:       mat->lfwork = (PetscInt)PetscRealPart(dummy);
866:       PetscMalloc1(mat->lfwork, &mat->fwork);
867:     }
868:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
869:     PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info));
870:     PetscFPTrapPop();
871: #endif
872:   } else { /* symmetric case */
873:     if (!mat->pivots) { PetscMalloc1(A->rmap->n, &mat->pivots); }
874:     if (!mat->fwork) {
875:       PetscScalar dummy;

877:       mat->lfwork = -1;
878:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
879:       PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info));
880:       PetscFPTrapPop();
881:       mat->lfwork = (PetscInt)PetscRealPart(dummy);
882:       PetscMalloc1(mat->lfwork, &mat->fwork);
883:     }
884:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
885:     PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info));
886:     PetscFPTrapPop();
887:   }

890:   A->ops->solve             = MatSolve_SeqDense_Cholesky;
891:   A->ops->matsolve          = MatMatSolve_SeqDense_Cholesky;
892:   A->ops->solvetranspose    = MatSolveTranspose_SeqDense_Cholesky;
893:   A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_Cholesky;
894:   A->factortype             = MAT_FACTOR_CHOLESKY;

896:   PetscFree(A->solvertype);
897:   PetscStrallocpy(MATSOLVERPETSC, &A->solvertype);

899:   PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0);
900:   return 0;
901: }

903: static PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy)
904: {
905:   MatFactorInfo info;

907:   info.fill = 1.0;

909:   MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES);
910:   PetscUseTypeMethod(fact, choleskyfactor, NULL, &info);
911:   return 0;
912: }

914: PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info)
915: {
916:   fact->assembled                  = PETSC_TRUE;
917:   fact->preallocated               = PETSC_TRUE;
918:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense;
919:   return 0;
920: }

922: PetscErrorCode MatQRFactor_SeqDense(Mat A, IS col, const MatFactorInfo *minfo)
923: {
924:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
925:   PetscBLASInt  n, m, info, min, max;

927:   PetscBLASIntCast(A->cmap->n, &n);
928:   PetscBLASIntCast(A->rmap->n, &m);
929:   max = PetscMax(m, n);
930:   min = PetscMin(m, n);
931:   if (!mat->tau) { PetscMalloc1(min, &mat->tau); }
932:   if (!mat->pivots) { PetscMalloc1(n, &mat->pivots); }
933:   if (!mat->qrrhs) MatCreateVecs(A, NULL, &(mat->qrrhs));
934:   if (!A->rmap->n || !A->cmap->n) return 0;
935:   if (!mat->fwork) {
936:     PetscScalar dummy;

938:     mat->lfwork = -1;
939:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
940:     PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, &dummy, &mat->lfwork, &info));
941:     PetscFPTrapPop();
942:     mat->lfwork = (PetscInt)PetscRealPart(dummy);
943:     PetscMalloc1(mat->lfwork, &mat->fwork);
944:   }
945:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
946:   PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, mat->fwork, &mat->lfwork, &info));
947:   PetscFPTrapPop();
949:   // TODO: try to estimate rank or test for and use geqp3 for rank revealing QR.  For now just say rank is min of m and n
950:   mat->rank = min;

952:   A->ops->solve    = MatSolve_SeqDense_QR;
953:   A->ops->matsolve = MatMatSolve_SeqDense_QR;
954:   A->factortype    = MAT_FACTOR_QR;
955:   if (m == n) {
956:     A->ops->solvetranspose    = MatSolveTranspose_SeqDense_QR;
957:     A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_QR;
958:   }

960:   PetscFree(A->solvertype);
961:   PetscStrallocpy(MATSOLVERPETSC, &A->solvertype);

963:   PetscLogFlops(2.0 * min * min * (max - min / 3.0));
964:   return 0;
965: }

967: static PetscErrorCode MatQRFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy)
968: {
969:   MatFactorInfo info;

971:   info.fill = 1.0;

973:   MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES);
974:   PetscUseMethod(fact, "MatQRFactor_C", (Mat, IS, const MatFactorInfo *), (fact, NULL, &info));
975:   return 0;
976: }

978: PetscErrorCode MatQRFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info)
979: {
980:   fact->assembled    = PETSC_TRUE;
981:   fact->preallocated = PETSC_TRUE;
982:   PetscObjectComposeFunction((PetscObject)fact, "MatQRFactorNumeric_C", MatQRFactorNumeric_SeqDense);
983:   return 0;
984: }

986: /* uses LAPACK */
987: PETSC_INTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A, MatFactorType ftype, Mat *fact)
988: {
989:   MatCreate(PetscObjectComm((PetscObject)A), fact);
990:   MatSetSizes(*fact, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n);
991:   MatSetType(*fact, MATDENSE);
992:   (*fact)->trivialsymbolic = PETSC_TRUE;
993:   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
994:     (*fact)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqDense;
995:     (*fact)->ops->ilufactorsymbolic = MatLUFactorSymbolic_SeqDense;
996:   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
997:     (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense;
998:   } else if (ftype == MAT_FACTOR_QR) {
999:     PetscObjectComposeFunction((PetscObject)(*fact), "MatQRFactorSymbolic_C", MatQRFactorSymbolic_SeqDense);
1000:   }
1001:   (*fact)->factortype = ftype;

1003:   PetscFree((*fact)->solvertype);
1004:   PetscStrallocpy(MATSOLVERPETSC, &(*fact)->solvertype);
1005:   PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_LU]);
1006:   PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ILU]);
1007:   PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_CHOLESKY]);
1008:   PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ICC]);
1009:   return 0;
1010: }

1012: /* ------------------------------------------------------------------*/
1013: static PetscErrorCode MatSOR_SeqDense(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal shift, PetscInt its, PetscInt lits, Vec xx)
1014: {
1015:   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
1016:   PetscScalar       *x, *v = mat->v, zero = 0.0, xt;
1017:   const PetscScalar *b;
1018:   PetscInt           m = A->rmap->n, i;
1019:   PetscBLASInt       o = 1, bm = 0;

1021: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1023: #endif
1024:   if (shift == -1) shift = 0.0; /* negative shift indicates do not error on zero diagonal; this code never zeros on zero diagonal */
1025:   PetscBLASIntCast(m, &bm);
1026:   if (flag & SOR_ZERO_INITIAL_GUESS) {
1027:     /* this is a hack fix, should have another version without the second BLASdotu */
1028:     VecSet(xx, zero);
1029:   }
1030:   VecGetArray(xx, &x);
1031:   VecGetArrayRead(bb, &b);
1032:   its = its * lits;
1034:   while (its--) {
1035:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1036:       for (i = 0; i < m; i++) {
1037:         PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o));
1038:         x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift);
1039:       }
1040:     }
1041:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1042:       for (i = m - 1; i >= 0; i--) {
1043:         PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o));
1044:         x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift);
1045:       }
1046:     }
1047:   }
1048:   VecRestoreArrayRead(bb, &b);
1049:   VecRestoreArray(xx, &x);
1050:   return 0;
1051: }

1053: /* -----------------------------------------------------------------*/
1054: PetscErrorCode MatMultTranspose_SeqDense(Mat A, Vec xx, Vec yy)
1055: {
1056:   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
1057:   const PetscScalar *v   = mat->v, *x;
1058:   PetscScalar       *y;
1059:   PetscBLASInt       m, n, _One = 1;
1060:   PetscScalar        _DOne = 1.0, _DZero = 0.0;

1062:   PetscBLASIntCast(A->rmap->n, &m);
1063:   PetscBLASIntCast(A->cmap->n, &n);
1064:   VecGetArrayRead(xx, &x);
1065:   VecGetArrayWrite(yy, &y);
1066:   if (!A->rmap->n || !A->cmap->n) {
1067:     PetscBLASInt i;
1068:     for (i = 0; i < n; i++) y[i] = 0.0;
1069:   } else {
1070:     PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v, &mat->lda, x, &_One, &_DZero, y, &_One));
1071:     PetscLogFlops(2.0 * A->rmap->n * A->cmap->n - A->cmap->n);
1072:   }
1073:   VecRestoreArrayRead(xx, &x);
1074:   VecRestoreArrayWrite(yy, &y);
1075:   return 0;
1076: }

1078: PetscErrorCode MatMult_SeqDense(Mat A, Vec xx, Vec yy)
1079: {
1080:   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
1081:   PetscScalar       *y, _DOne = 1.0, _DZero = 0.0;
1082:   PetscBLASInt       m, n, _One             = 1;
1083:   const PetscScalar *v = mat->v, *x;

1085:   PetscBLASIntCast(A->rmap->n, &m);
1086:   PetscBLASIntCast(A->cmap->n, &n);
1087:   VecGetArrayRead(xx, &x);
1088:   VecGetArrayWrite(yy, &y);
1089:   if (!A->rmap->n || !A->cmap->n) {
1090:     PetscBLASInt i;
1091:     for (i = 0; i < m; i++) y[i] = 0.0;
1092:   } else {
1093:     PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DZero, y, &_One));
1094:     PetscLogFlops(2.0 * A->rmap->n * A->cmap->n - A->rmap->n);
1095:   }
1096:   VecRestoreArrayRead(xx, &x);
1097:   VecRestoreArrayWrite(yy, &y);
1098:   return 0;
1099: }

1101: PetscErrorCode MatMultAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy)
1102: {
1103:   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
1104:   const PetscScalar *v   = mat->v, *x;
1105:   PetscScalar       *y, _DOne = 1.0;
1106:   PetscBLASInt       m, n, _One = 1;

1108:   PetscBLASIntCast(A->rmap->n, &m);
1109:   PetscBLASIntCast(A->cmap->n, &n);
1110:   VecCopy(zz, yy);
1111:   if (!A->rmap->n || !A->cmap->n) return 0;
1112:   VecGetArrayRead(xx, &x);
1113:   VecGetArray(yy, &y);
1114:   PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DOne, y, &_One));
1115:   VecRestoreArrayRead(xx, &x);
1116:   VecRestoreArray(yy, &y);
1117:   PetscLogFlops(2.0 * A->rmap->n * A->cmap->n);
1118:   return 0;
1119: }

1121: PetscErrorCode MatMultTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy)
1122: {
1123:   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
1124:   const PetscScalar *v   = mat->v, *x;
1125:   PetscScalar       *y;
1126:   PetscBLASInt       m, n, _One = 1;
1127:   PetscScalar        _DOne = 1.0;

1129:   PetscBLASIntCast(A->rmap->n, &m);
1130:   PetscBLASIntCast(A->cmap->n, &n);
1131:   VecCopy(zz, yy);
1132:   if (!A->rmap->n || !A->cmap->n) return 0;
1133:   VecGetArrayRead(xx, &x);
1134:   VecGetArray(yy, &y);
1135:   PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DOne, y, &_One));
1136:   VecRestoreArrayRead(xx, &x);
1137:   VecRestoreArray(yy, &y);
1138:   PetscLogFlops(2.0 * A->rmap->n * A->cmap->n);
1139:   return 0;
1140: }

1142: /* -----------------------------------------------------------------*/
1143: static PetscErrorCode MatGetRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals)
1144: {
1145:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1146:   PetscInt      i;

1148:   *ncols = A->cmap->n;
1149:   if (cols) {
1150:     PetscMalloc1(A->cmap->n, cols);
1151:     for (i = 0; i < A->cmap->n; i++) (*cols)[i] = i;
1152:   }
1153:   if (vals) {
1154:     const PetscScalar *v;

1156:     MatDenseGetArrayRead(A, &v);
1157:     PetscMalloc1(A->cmap->n, vals);
1158:     v += row;
1159:     for (i = 0; i < A->cmap->n; i++) {
1160:       (*vals)[i] = *v;
1161:       v += mat->lda;
1162:     }
1163:     MatDenseRestoreArrayRead(A, &v);
1164:   }
1165:   return 0;
1166: }

1168: static PetscErrorCode MatRestoreRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals)
1169: {
1170:   if (ncols) *ncols = 0;
1171:   if (cols) PetscFree(*cols);
1172:   if (vals) PetscFree(*vals);
1173:   return 0;
1174: }
1175: /* ----------------------------------------------------------------*/
1176: static PetscErrorCode MatSetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], const PetscScalar v[], InsertMode addv)
1177: {
1178:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1179:   PetscScalar  *av;
1180:   PetscInt      i, j, idx = 0;
1181: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1182:   PetscOffloadMask oldf;
1183: #endif

1185:   MatDenseGetArray(A, &av);
1186:   if (!mat->roworiented) {
1187:     if (addv == INSERT_VALUES) {
1188:       for (j = 0; j < n; j++) {
1189:         if (indexn[j] < 0) {
1190:           idx += m;
1191:           continue;
1192:         }
1194:         for (i = 0; i < m; i++) {
1195:           if (indexm[i] < 0) {
1196:             idx++;
1197:             continue;
1198:           }
1200:           av[indexn[j] * mat->lda + indexm[i]] = v[idx++];
1201:         }
1202:       }
1203:     } else {
1204:       for (j = 0; j < n; j++) {
1205:         if (indexn[j] < 0) {
1206:           idx += m;
1207:           continue;
1208:         }
1210:         for (i = 0; i < m; i++) {
1211:           if (indexm[i] < 0) {
1212:             idx++;
1213:             continue;
1214:           }
1216:           av[indexn[j] * mat->lda + indexm[i]] += v[idx++];
1217:         }
1218:       }
1219:     }
1220:   } else {
1221:     if (addv == INSERT_VALUES) {
1222:       for (i = 0; i < m; i++) {
1223:         if (indexm[i] < 0) {
1224:           idx += n;
1225:           continue;
1226:         }
1228:         for (j = 0; j < n; j++) {
1229:           if (indexn[j] < 0) {
1230:             idx++;
1231:             continue;
1232:           }
1234:           av[indexn[j] * mat->lda + indexm[i]] = v[idx++];
1235:         }
1236:       }
1237:     } else {
1238:       for (i = 0; i < m; i++) {
1239:         if (indexm[i] < 0) {
1240:           idx += n;
1241:           continue;
1242:         }
1244:         for (j = 0; j < n; j++) {
1245:           if (indexn[j] < 0) {
1246:             idx++;
1247:             continue;
1248:           }
1250:           av[indexn[j] * mat->lda + indexm[i]] += v[idx++];
1251:         }
1252:       }
1253:     }
1254:   }
1255:   /* hack to prevent unneeded copy to the GPU while returning the array */
1256: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1257:   oldf           = A->offloadmask;
1258:   A->offloadmask = PETSC_OFFLOAD_GPU;
1259: #endif
1260:   MatDenseRestoreArray(A, &av);
1261: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1262:   A->offloadmask = (oldf == PETSC_OFFLOAD_UNALLOCATED ? PETSC_OFFLOAD_UNALLOCATED : PETSC_OFFLOAD_CPU);
1263: #endif
1264:   return 0;
1265: }

1267: static PetscErrorCode MatGetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], PetscScalar v[])
1268: {
1269:   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
1270:   const PetscScalar *vv;
1271:   PetscInt           i, j;

1273:   MatDenseGetArrayRead(A, &vv);
1274:   /* row-oriented output */
1275:   for (i = 0; i < m; i++) {
1276:     if (indexm[i] < 0) {
1277:       v += n;
1278:       continue;
1279:     }
1281:     for (j = 0; j < n; j++) {
1282:       if (indexn[j] < 0) {
1283:         v++;
1284:         continue;
1285:       }
1287:       *v++ = vv[indexn[j] * mat->lda + indexm[i]];
1288:     }
1289:   }
1290:   MatDenseRestoreArrayRead(A, &vv);
1291:   return 0;
1292: }

1294: /* -----------------------------------------------------------------*/

1296: PetscErrorCode MatView_Dense_Binary(Mat mat, PetscViewer viewer)
1297: {
1298:   PetscBool          skipHeader;
1299:   PetscViewerFormat  format;
1300:   PetscInt           header[4], M, N, m, lda, i, j, k;
1301:   const PetscScalar *v;
1302:   PetscScalar       *vwork;

1304:   PetscViewerSetUp(viewer);
1305:   PetscViewerBinaryGetSkipHeader(viewer, &skipHeader);
1306:   PetscViewerGetFormat(viewer, &format);
1307:   if (skipHeader) format = PETSC_VIEWER_NATIVE;

1309:   MatGetSize(mat, &M, &N);

1311:   /* write matrix header */
1312:   header[0] = MAT_FILE_CLASSID;
1313:   header[1] = M;
1314:   header[2] = N;
1315:   header[3] = (format == PETSC_VIEWER_NATIVE) ? MATRIX_BINARY_FORMAT_DENSE : M * N;
1316:   if (!skipHeader) PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT);

1318:   MatGetLocalSize(mat, &m, NULL);
1319:   if (format != PETSC_VIEWER_NATIVE) {
1320:     PetscInt nnz = m * N, *iwork;
1321:     /* store row lengths for each row */
1322:     PetscMalloc1(nnz, &iwork);
1323:     for (i = 0; i < m; i++) iwork[i] = N;
1324:     PetscViewerBinaryWriteAll(viewer, iwork, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT);
1325:     /* store column indices (zero start index) */
1326:     for (k = 0, i = 0; i < m; i++)
1327:       for (j = 0; j < N; j++, k++) iwork[k] = j;
1328:     PetscViewerBinaryWriteAll(viewer, iwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT);
1329:     PetscFree(iwork);
1330:   }
1331:   /* store matrix values as a dense matrix in row major order */
1332:   PetscMalloc1(m * N, &vwork);
1333:   MatDenseGetArrayRead(mat, &v);
1334:   MatDenseGetLDA(mat, &lda);
1335:   for (k = 0, i = 0; i < m; i++)
1336:     for (j = 0; j < N; j++, k++) vwork[k] = v[i + lda * j];
1337:   MatDenseRestoreArrayRead(mat, &v);
1338:   PetscViewerBinaryWriteAll(viewer, vwork, m * N, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR);
1339:   PetscFree(vwork);
1340:   return 0;
1341: }

1343: PetscErrorCode MatLoad_Dense_Binary(Mat mat, PetscViewer viewer)
1344: {
1345:   PetscBool    skipHeader;
1346:   PetscInt     header[4], M, N, m, nz, lda, i, j, k;
1347:   PetscInt     rows, cols;
1348:   PetscScalar *v, *vwork;

1350:   PetscViewerSetUp(viewer);
1351:   PetscViewerBinaryGetSkipHeader(viewer, &skipHeader);

1353:   if (!skipHeader) {
1354:     PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT);
1356:     M = header[1];
1357:     N = header[2];
1360:     nz = header[3];
1362:   } else {
1363:     MatGetSize(mat, &M, &N);
1365:     nz = MATRIX_BINARY_FORMAT_DENSE;
1366:   }

1368:   /* setup global sizes if not set */
1369:   if (mat->rmap->N < 0) mat->rmap->N = M;
1370:   if (mat->cmap->N < 0) mat->cmap->N = N;
1371:   MatSetUp(mat);
1372:   /* check if global sizes are correct */
1373:   MatGetSize(mat, &rows, &cols);

1376:   MatGetSize(mat, NULL, &N);
1377:   MatGetLocalSize(mat, &m, NULL);
1378:   MatDenseGetArray(mat, &v);
1379:   MatDenseGetLDA(mat, &lda);
1380:   if (nz == MATRIX_BINARY_FORMAT_DENSE) { /* matrix in file is dense format */
1381:     PetscInt nnz = m * N;
1382:     /* read in matrix values */
1383:     PetscMalloc1(nnz, &vwork);
1384:     PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR);
1385:     /* store values in column major order */
1386:     for (j = 0; j < N; j++)
1387:       for (i = 0; i < m; i++) v[i + lda * j] = vwork[i * N + j];
1388:     PetscFree(vwork);
1389:   } else { /* matrix in file is sparse format */
1390:     PetscInt nnz = 0, *rlens, *icols;
1391:     /* read in row lengths */
1392:     PetscMalloc1(m, &rlens);
1393:     PetscViewerBinaryReadAll(viewer, rlens, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT);
1394:     for (i = 0; i < m; i++) nnz += rlens[i];
1395:     /* read in column indices and values */
1396:     PetscMalloc2(nnz, &icols, nnz, &vwork);
1397:     PetscViewerBinaryReadAll(viewer, icols, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT);
1398:     PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR);
1399:     /* store values in column major order */
1400:     for (k = 0, i = 0; i < m; i++)
1401:       for (j = 0; j < rlens[i]; j++, k++) v[i + lda * icols[k]] = vwork[k];
1402:     PetscFree(rlens);
1403:     PetscFree2(icols, vwork);
1404:   }
1405:   MatDenseRestoreArray(mat, &v);
1406:   MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY);
1407:   MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY);
1408:   return 0;
1409: }

1411: PetscErrorCode MatLoad_SeqDense(Mat newMat, PetscViewer viewer)
1412: {
1413:   PetscBool isbinary, ishdf5;

1417:   /* force binary viewer to load .info file if it has not yet done so */
1418:   PetscViewerSetUp(viewer);
1419:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary);
1420:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5);
1421:   if (isbinary) {
1422:     MatLoad_Dense_Binary(newMat, viewer);
1423:   } else if (ishdf5) {
1424: #if defined(PETSC_HAVE_HDF5)
1425:     MatLoad_Dense_HDF5(newMat, viewer);
1426: #else
1427:     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
1428: #endif
1429:   } else {
1430:     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
1431:   }
1432:   return 0;
1433: }

1435: static PetscErrorCode MatView_SeqDense_ASCII(Mat A, PetscViewer viewer)
1436: {
1437:   Mat_SeqDense     *a = (Mat_SeqDense *)A->data;
1438:   PetscInt          i, j;
1439:   const char       *name;
1440:   PetscScalar      *v, *av;
1441:   PetscViewerFormat format;
1442: #if defined(PETSC_USE_COMPLEX)
1443:   PetscBool allreal = PETSC_TRUE;
1444: #endif

1446:   MatDenseGetArrayRead(A, (const PetscScalar **)&av);
1447:   PetscViewerGetFormat(viewer, &format);
1448:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1449:     return 0; /* do nothing for now */
1450:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1451:     PetscViewerASCIIUseTabs(viewer, PETSC_FALSE);
1452:     for (i = 0; i < A->rmap->n; i++) {
1453:       v = av + i;
1454:       PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i);
1455:       for (j = 0; j < A->cmap->n; j++) {
1456: #if defined(PETSC_USE_COMPLEX)
1457:         if (PetscRealPart(*v) != 0.0 && PetscImaginaryPart(*v) != 0.0) {
1458:           PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", j, (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v));
1459:         } else if (PetscRealPart(*v)) {
1460:           PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)PetscRealPart(*v));
1461:         }
1462: #else
1463:         if (*v) PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)*v);
1464: #endif
1465:         v += a->lda;
1466:       }
1467:       PetscViewerASCIIPrintf(viewer, "\n");
1468:     }
1469:     PetscViewerASCIIUseTabs(viewer, PETSC_TRUE);
1470:   } else {
1471:     PetscViewerASCIIUseTabs(viewer, PETSC_FALSE);
1472: #if defined(PETSC_USE_COMPLEX)
1473:     /* determine if matrix has all real values */
1474:     for (j = 0; j < A->cmap->n; j++) {
1475:       v = av + j * a->lda;
1476:       for (i = 0; i < A->rmap->n; i++) {
1477:         if (PetscImaginaryPart(v[i])) {
1478:           allreal = PETSC_FALSE;
1479:           break;
1480:         }
1481:       }
1482:     }
1483: #endif
1484:     if (format == PETSC_VIEWER_ASCII_MATLAB) {
1485:       PetscObjectGetName((PetscObject)A, &name);
1486:       PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", A->rmap->n, A->cmap->n);
1487:       PetscViewerASCIIPrintf(viewer, "%s = zeros(%" PetscInt_FMT ",%" PetscInt_FMT ");\n", name, A->rmap->n, A->cmap->n);
1488:       PetscViewerASCIIPrintf(viewer, "%s = [\n", name);
1489:     }

1491:     for (i = 0; i < A->rmap->n; i++) {
1492:       v = av + i;
1493:       for (j = 0; j < A->cmap->n; j++) {
1494: #if defined(PETSC_USE_COMPLEX)
1495:         if (allreal) {
1496:           PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(*v));
1497:         } else {
1498:           PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei ", (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v));
1499:         }
1500: #else
1501:         PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)*v);
1502: #endif
1503:         v += a->lda;
1504:       }
1505:       PetscViewerASCIIPrintf(viewer, "\n");
1506:     }
1507:     if (format == PETSC_VIEWER_ASCII_MATLAB) PetscViewerASCIIPrintf(viewer, "];\n");
1508:     PetscViewerASCIIUseTabs(viewer, PETSC_TRUE);
1509:   }
1510:   MatDenseRestoreArrayRead(A, (const PetscScalar **)&av);
1511:   PetscViewerFlush(viewer);
1512:   return 0;
1513: }

1515: #include <petscdraw.h>
1516: static PetscErrorCode MatView_SeqDense_Draw_Zoom(PetscDraw draw, void *Aa)
1517: {
1518:   Mat                A = (Mat)Aa;
1519:   PetscInt           m = A->rmap->n, n = A->cmap->n, i, j;
1520:   int                color = PETSC_DRAW_WHITE;
1521:   const PetscScalar *v;
1522:   PetscViewer        viewer;
1523:   PetscReal          xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1524:   PetscViewerFormat  format;

1526:   PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer);
1527:   PetscViewerGetFormat(viewer, &format);
1528:   PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr);

1530:   /* Loop over matrix elements drawing boxes */
1531:   MatDenseGetArrayRead(A, &v);
1532:   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1533:     PetscDrawCollectiveBegin(draw);
1534:     /* Blue for negative and Red for positive */
1535:     for (j = 0; j < n; j++) {
1536:       x_l = j;
1537:       x_r = x_l + 1.0;
1538:       for (i = 0; i < m; i++) {
1539:         y_l = m - i - 1.0;
1540:         y_r = y_l + 1.0;
1541:         if (PetscRealPart(v[j * m + i]) > 0.) color = PETSC_DRAW_RED;
1542:         else if (PetscRealPart(v[j * m + i]) < 0.) color = PETSC_DRAW_BLUE;
1543:         else continue;
1544:         PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color);
1545:       }
1546:     }
1547:     PetscDrawCollectiveEnd(draw);
1548:   } else {
1549:     /* use contour shading to indicate magnitude of values */
1550:     /* first determine max of all nonzero values */
1551:     PetscReal minv = 0.0, maxv = 0.0;
1552:     PetscDraw popup;

1554:     for (i = 0; i < m * n; i++) {
1555:       if (PetscAbsScalar(v[i]) > maxv) maxv = PetscAbsScalar(v[i]);
1556:     }
1557:     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1558:     PetscDrawGetPopup(draw, &popup);
1559:     PetscDrawScalePopup(popup, minv, maxv);

1561:     PetscDrawCollectiveBegin(draw);
1562:     for (j = 0; j < n; j++) {
1563:       x_l = j;
1564:       x_r = x_l + 1.0;
1565:       for (i = 0; i < m; i++) {
1566:         y_l   = m - i - 1.0;
1567:         y_r   = y_l + 1.0;
1568:         color = PetscDrawRealToColor(PetscAbsScalar(v[j * m + i]), minv, maxv);
1569:         PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color);
1570:       }
1571:     }
1572:     PetscDrawCollectiveEnd(draw);
1573:   }
1574:   MatDenseRestoreArrayRead(A, &v);
1575:   return 0;
1576: }

1578: static PetscErrorCode MatView_SeqDense_Draw(Mat A, PetscViewer viewer)
1579: {
1580:   PetscDraw draw;
1581:   PetscBool isnull;
1582:   PetscReal xr, yr, xl, yl, h, w;

1584:   PetscViewerDrawGetDraw(viewer, 0, &draw);
1585:   PetscDrawIsNull(draw, &isnull);
1586:   if (isnull) return 0;

1588:   xr = A->cmap->n;
1589:   yr = A->rmap->n;
1590:   h  = yr / 10.0;
1591:   w  = xr / 10.0;
1592:   xr += w;
1593:   yr += h;
1594:   xl = -w;
1595:   yl = -h;
1596:   PetscDrawSetCoordinates(draw, xl, yl, xr, yr);
1597:   PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer);
1598:   PetscDrawZoom(draw, MatView_SeqDense_Draw_Zoom, A);
1599:   PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL);
1600:   PetscDrawSave(draw);
1601:   return 0;
1602: }

1604: PetscErrorCode MatView_SeqDense(Mat A, PetscViewer viewer)
1605: {
1606:   PetscBool iascii, isbinary, isdraw;

1608:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
1609:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary);
1610:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw);
1611:   if (iascii) MatView_SeqDense_ASCII(A, viewer);
1612:   else if (isbinary) MatView_Dense_Binary(A, viewer);
1613:   else if (isdraw) MatView_SeqDense_Draw(A, viewer);
1614:   return 0;
1615: }

1617: static PetscErrorCode MatDensePlaceArray_SeqDense(Mat A, const PetscScalar *array)
1618: {
1619:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;

1624:   a->unplacedarray       = a->v;
1625:   a->unplaced_user_alloc = a->user_alloc;
1626:   a->v                   = (PetscScalar *)array;
1627:   a->user_alloc          = PETSC_TRUE;
1628: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1629:   A->offloadmask = PETSC_OFFLOAD_CPU;
1630: #endif
1631:   return 0;
1632: }

1634: static PetscErrorCode MatDenseResetArray_SeqDense(Mat A)
1635: {
1636:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;

1640:   a->v             = a->unplacedarray;
1641:   a->user_alloc    = a->unplaced_user_alloc;
1642:   a->unplacedarray = NULL;
1643: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1644:   A->offloadmask = PETSC_OFFLOAD_CPU;
1645: #endif
1646:   return 0;
1647: }

1649: static PetscErrorCode MatDenseReplaceArray_SeqDense(Mat A, const PetscScalar *array)
1650: {
1651:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;

1655:   if (!a->user_alloc) PetscFree(a->v);
1656:   a->v          = (PetscScalar *)array;
1657:   a->user_alloc = PETSC_FALSE;
1658: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1659:   A->offloadmask = PETSC_OFFLOAD_CPU;
1660: #endif
1661:   return 0;
1662: }

1664: PetscErrorCode MatDestroy_SeqDense(Mat mat)
1665: {
1666:   Mat_SeqDense *l = (Mat_SeqDense *)mat->data;

1668: #if defined(PETSC_USE_LOG)
1669:   PetscLogObjectState((PetscObject)mat, "Rows %" PetscInt_FMT " Cols %" PetscInt_FMT, mat->rmap->n, mat->cmap->n);
1670: #endif
1671:   VecDestroy(&(l->qrrhs));
1672:   PetscFree(l->tau);
1673:   PetscFree(l->pivots);
1674:   PetscFree(l->fwork);
1675:   MatDestroy(&l->ptapwork);
1676:   if (!l->user_alloc) PetscFree(l->v);
1677:   if (!l->unplaced_user_alloc) PetscFree(l->unplacedarray);
1680:   VecDestroy(&l->cvec);
1681:   MatDestroy(&l->cmat);
1682:   PetscFree(mat->data);

1684:   PetscObjectChangeTypeName((PetscObject)mat, NULL);
1685:   PetscObjectComposeFunction((PetscObject)mat, "MatQRFactor_C", NULL);
1686:   PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorSymbolic_C", NULL);
1687:   PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorNumeric_C", NULL);
1688:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL);
1689:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL);
1690:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL);
1691:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL);
1692:   PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL);
1693:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL);
1694:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL);
1695:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL);
1696:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL);
1697:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL);
1698:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL);
1699:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqaij_C", NULL);
1700: #if defined(PETSC_HAVE_ELEMENTAL)
1701:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_elemental_C", NULL);
1702: #endif
1703: #if defined(PETSC_HAVE_SCALAPACK)
1704:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_scalapack_C", NULL);
1705: #endif
1706: #if defined(PETSC_HAVE_CUDA)
1707:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensecuda_C", NULL);
1708:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", NULL);
1709:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdense_C", NULL);
1710:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensecuda_C", NULL);
1711: #endif
1712: #if defined(PETSC_HAVE_HIP)
1713:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensehip_C", NULL);
1714:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", NULL);
1715:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdense_C", NULL);
1716:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensehip_C", NULL);
1717: #endif
1718:   PetscObjectComposeFunction((PetscObject)mat, "MatSeqDenseSetPreallocation_C", NULL);
1719:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqaij_seqdense_C", NULL);
1720:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdense_C", NULL);
1721:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqbaij_seqdense_C", NULL);
1722:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqsbaij_seqdense_C", NULL);

1724:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL);
1725:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL);
1726:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL);
1727:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL);
1728:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL);
1729:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL);
1730:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL);
1731:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL);
1732:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL);
1733:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL);
1734:   return 0;
1735: }

1737: static PetscErrorCode MatTranspose_SeqDense(Mat A, MatReuse reuse, Mat *matout)
1738: {
1739:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1740:   PetscInt      k, j, m = A->rmap->n, M = mat->lda, n = A->cmap->n;
1741:   PetscScalar  *v, tmp;

1743:   if (reuse == MAT_REUSE_MATRIX) MatTransposeCheckNonzeroState_Private(A, *matout);
1744:   if (reuse == MAT_INPLACE_MATRIX) {
1745:     if (m == n) { /* in place transpose */
1746:       MatDenseGetArray(A, &v);
1747:       for (j = 0; j < m; j++) {
1748:         for (k = 0; k < j; k++) {
1749:           tmp          = v[j + k * M];
1750:           v[j + k * M] = v[k + j * M];
1751:           v[k + j * M] = tmp;
1752:         }
1753:       }
1754:       MatDenseRestoreArray(A, &v);
1755:     } else { /* reuse memory, temporary allocates new memory */
1756:       PetscScalar *v2;
1757:       PetscLayout  tmplayout;

1759:       PetscMalloc1((size_t)m * n, &v2);
1760:       MatDenseGetArray(A, &v);
1761:       for (j = 0; j < n; j++) {
1762:         for (k = 0; k < m; k++) v2[j + (size_t)k * n] = v[k + (size_t)j * M];
1763:       }
1764:       PetscArraycpy(v, v2, (size_t)m * n);
1765:       PetscFree(v2);
1766:       MatDenseRestoreArray(A, &v);
1767:       /* cleanup size dependent quantities */
1768:       VecDestroy(&mat->cvec);
1769:       MatDestroy(&mat->cmat);
1770:       PetscFree(mat->pivots);
1771:       PetscFree(mat->fwork);
1772:       MatDestroy(&mat->ptapwork);
1773:       /* swap row/col layouts */
1774:       mat->lda  = n;
1775:       tmplayout = A->rmap;
1776:       A->rmap   = A->cmap;
1777:       A->cmap   = tmplayout;
1778:     }
1779:   } else { /* out-of-place transpose */
1780:     Mat           tmat;
1781:     Mat_SeqDense *tmatd;
1782:     PetscScalar  *v2;
1783:     PetscInt      M2;

1785:     if (reuse == MAT_INITIAL_MATRIX) {
1786:       MatCreate(PetscObjectComm((PetscObject)A), &tmat);
1787:       MatSetSizes(tmat, A->cmap->n, A->rmap->n, A->cmap->n, A->rmap->n);
1788:       MatSetType(tmat, ((PetscObject)A)->type_name);
1789:       MatSeqDenseSetPreallocation(tmat, NULL);
1790:     } else tmat = *matout;

1792:     MatDenseGetArrayRead(A, (const PetscScalar **)&v);
1793:     MatDenseGetArray(tmat, &v2);
1794:     tmatd = (Mat_SeqDense *)tmat->data;
1795:     M2    = tmatd->lda;
1796:     for (j = 0; j < n; j++) {
1797:       for (k = 0; k < m; k++) v2[j + k * M2] = v[k + j * M];
1798:     }
1799:     MatDenseRestoreArray(tmat, &v2);
1800:     MatDenseRestoreArrayRead(A, (const PetscScalar **)&v);
1801:     MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY);
1802:     MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY);
1803:     *matout = tmat;
1804:   }
1805:   return 0;
1806: }

1808: static PetscErrorCode MatEqual_SeqDense(Mat A1, Mat A2, PetscBool *flg)
1809: {
1810:   Mat_SeqDense      *mat1 = (Mat_SeqDense *)A1->data;
1811:   Mat_SeqDense      *mat2 = (Mat_SeqDense *)A2->data;
1812:   PetscInt           i;
1813:   const PetscScalar *v1, *v2;

1815:   if (A1->rmap->n != A2->rmap->n) {
1816:     *flg = PETSC_FALSE;
1817:     return 0;
1818:   }
1819:   if (A1->cmap->n != A2->cmap->n) {
1820:     *flg = PETSC_FALSE;
1821:     return 0;
1822:   }
1823:   MatDenseGetArrayRead(A1, &v1);
1824:   MatDenseGetArrayRead(A2, &v2);
1825:   for (i = 0; i < A1->cmap->n; i++) {
1826:     PetscArraycmp(v1, v2, A1->rmap->n, flg);
1827:     if (*flg == PETSC_FALSE) return 0;
1828:     v1 += mat1->lda;
1829:     v2 += mat2->lda;
1830:   }
1831:   MatDenseRestoreArrayRead(A1, &v1);
1832:   MatDenseRestoreArrayRead(A2, &v2);
1833:   *flg = PETSC_TRUE;
1834:   return 0;
1835: }

1837: static PetscErrorCode MatGetDiagonal_SeqDense(Mat A, Vec v)
1838: {
1839:   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
1840:   PetscInt           i, n, len;
1841:   PetscScalar       *x;
1842:   const PetscScalar *vv;

1844:   VecGetSize(v, &n);
1845:   VecGetArray(v, &x);
1846:   len = PetscMin(A->rmap->n, A->cmap->n);
1847:   MatDenseGetArrayRead(A, &vv);
1849:   for (i = 0; i < len; i++) x[i] = vv[i * mat->lda + i];
1850:   MatDenseRestoreArrayRead(A, &vv);
1851:   VecRestoreArray(v, &x);
1852:   return 0;
1853: }

1855: static PetscErrorCode MatDiagonalScale_SeqDense(Mat A, Vec ll, Vec rr)
1856: {
1857:   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
1858:   const PetscScalar *l, *r;
1859:   PetscScalar        x, *v, *vv;
1860:   PetscInt           i, j, m = A->rmap->n, n = A->cmap->n;

1862:   MatDenseGetArray(A, &vv);
1863:   if (ll) {
1864:     VecGetSize(ll, &m);
1865:     VecGetArrayRead(ll, &l);
1867:     for (i = 0; i < m; i++) {
1868:       x = l[i];
1869:       v = vv + i;
1870:       for (j = 0; j < n; j++) {
1871:         (*v) *= x;
1872:         v += mat->lda;
1873:       }
1874:     }
1875:     VecRestoreArrayRead(ll, &l);
1876:     PetscLogFlops(1.0 * n * m);
1877:   }
1878:   if (rr) {
1879:     VecGetSize(rr, &n);
1880:     VecGetArrayRead(rr, &r);
1882:     for (i = 0; i < n; i++) {
1883:       x = r[i];
1884:       v = vv + i * mat->lda;
1885:       for (j = 0; j < m; j++) (*v++) *= x;
1886:     }
1887:     VecRestoreArrayRead(rr, &r);
1888:     PetscLogFlops(1.0 * n * m);
1889:   }
1890:   MatDenseRestoreArray(A, &vv);
1891:   return 0;
1892: }

1894: PetscErrorCode MatNorm_SeqDense(Mat A, NormType type, PetscReal *nrm)
1895: {
1896:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1897:   PetscScalar  *v, *vv;
1898:   PetscReal     sum = 0.0;
1899:   PetscInt      lda, m = A->rmap->n, i, j;

1901:   MatDenseGetArrayRead(A, (const PetscScalar **)&vv);
1902:   MatDenseGetLDA(A, &lda);
1903:   v = vv;
1904:   if (type == NORM_FROBENIUS) {
1905:     if (lda > m) {
1906:       for (j = 0; j < A->cmap->n; j++) {
1907:         v = vv + j * lda;
1908:         for (i = 0; i < m; i++) {
1909:           sum += PetscRealPart(PetscConj(*v) * (*v));
1910:           v++;
1911:         }
1912:       }
1913:     } else {
1914: #if defined(PETSC_USE_REAL___FP16)
1915:       PetscBLASInt one = 1, cnt = A->cmap->n * A->rmap->n;
1916:       PetscCallBLAS("BLASnrm2", *nrm = BLASnrm2_(&cnt, v, &one));
1917:     }
1918: #else
1919:       for (i = 0; i < A->cmap->n * A->rmap->n; i++) {
1920:         sum += PetscRealPart(PetscConj(*v) * (*v));
1921:         v++;
1922:       }
1923:     }
1924:     *nrm = PetscSqrtReal(sum);
1925: #endif
1926:     PetscLogFlops(2.0 * A->cmap->n * A->rmap->n);
1927:   } else if (type == NORM_1) {
1928:     *nrm = 0.0;
1929:     for (j = 0; j < A->cmap->n; j++) {
1930:       v   = vv + j * mat->lda;
1931:       sum = 0.0;
1932:       for (i = 0; i < A->rmap->n; i++) {
1933:         sum += PetscAbsScalar(*v);
1934:         v++;
1935:       }
1936:       if (sum > *nrm) *nrm = sum;
1937:     }
1938:     PetscLogFlops(1.0 * A->cmap->n * A->rmap->n);
1939:   } else if (type == NORM_INFINITY) {
1940:     *nrm = 0.0;
1941:     for (j = 0; j < A->rmap->n; j++) {
1942:       v   = vv + j;
1943:       sum = 0.0;
1944:       for (i = 0; i < A->cmap->n; i++) {
1945:         sum += PetscAbsScalar(*v);
1946:         v += mat->lda;
1947:       }
1948:       if (sum > *nrm) *nrm = sum;
1949:     }
1950:     PetscLogFlops(1.0 * A->cmap->n * A->rmap->n);
1951:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No two norm");
1952:   MatDenseRestoreArrayRead(A, (const PetscScalar **)&vv);
1953:   return 0;
1954: }

1956: static PetscErrorCode MatSetOption_SeqDense(Mat A, MatOption op, PetscBool flg)
1957: {
1958:   Mat_SeqDense *aij = (Mat_SeqDense *)A->data;

1960:   switch (op) {
1961:   case MAT_ROW_ORIENTED:
1962:     aij->roworiented = flg;
1963:     break;
1964:   case MAT_NEW_NONZERO_LOCATIONS:
1965:   case MAT_NEW_NONZERO_LOCATION_ERR:
1966:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1967:   case MAT_FORCE_DIAGONAL_ENTRIES:
1968:   case MAT_KEEP_NONZERO_PATTERN:
1969:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1970:   case MAT_USE_HASH_TABLE:
1971:   case MAT_IGNORE_ZERO_ENTRIES:
1972:   case MAT_IGNORE_LOWER_TRIANGULAR:
1973:   case MAT_SORTED_FULL:
1974:     PetscInfo(A, "Option %s ignored\n", MatOptions[op]);
1975:     break;
1976:   case MAT_SPD:
1977:   case MAT_SYMMETRIC:
1978:   case MAT_STRUCTURALLY_SYMMETRIC:
1979:   case MAT_HERMITIAN:
1980:   case MAT_SYMMETRY_ETERNAL:
1981:   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1982:   case MAT_SPD_ETERNAL:
1983:     break;
1984:   default:
1985:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
1986:   }
1987:   return 0;
1988: }

1990: PetscErrorCode MatZeroEntries_SeqDense(Mat A)
1991: {
1992:   Mat_SeqDense *l   = (Mat_SeqDense *)A->data;
1993:   PetscInt      lda = l->lda, m = A->rmap->n, n = A->cmap->n, j;
1994:   PetscScalar  *v;

1996:   MatDenseGetArrayWrite(A, &v);
1997:   if (lda > m) {
1998:     for (j = 0; j < n; j++) PetscArrayzero(v + j * lda, m);
1999:   } else {
2000:     PetscArrayzero(v, PetscInt64Mult(m, n));
2001:   }
2002:   MatDenseRestoreArrayWrite(A, &v);
2003:   return 0;
2004: }

2006: static PetscErrorCode MatZeroRows_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
2007: {
2008:   Mat_SeqDense      *l = (Mat_SeqDense *)A->data;
2009:   PetscInt           m = l->lda, n = A->cmap->n, i, j;
2010:   PetscScalar       *slot, *bb, *v;
2011:   const PetscScalar *xx;

2013:   if (PetscDefined(USE_DEBUG)) {
2014:     for (i = 0; i < N; i++) {
2017:     }
2018:   }
2019:   if (!N) return 0;

2021:   /* fix right hand side if needed */
2022:   if (x && b) {
2023:     VecGetArrayRead(x, &xx);
2024:     VecGetArray(b, &bb);
2025:     for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]];
2026:     VecRestoreArrayRead(x, &xx);
2027:     VecRestoreArray(b, &bb);
2028:   }

2030:   MatDenseGetArray(A, &v);
2031:   for (i = 0; i < N; i++) {
2032:     slot = v + rows[i];
2033:     for (j = 0; j < n; j++) {
2034:       *slot = 0.0;
2035:       slot += m;
2036:     }
2037:   }
2038:   if (diag != 0.0) {
2040:     for (i = 0; i < N; i++) {
2041:       slot  = v + (m + 1) * rows[i];
2042:       *slot = diag;
2043:     }
2044:   }
2045:   MatDenseRestoreArray(A, &v);
2046:   return 0;
2047: }

2049: static PetscErrorCode MatDenseGetLDA_SeqDense(Mat A, PetscInt *lda)
2050: {
2051:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;

2053:   *lda = mat->lda;
2054:   return 0;
2055: }

2057: PetscErrorCode MatDenseGetArray_SeqDense(Mat A, PetscScalar **array)
2058: {
2059:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;

2062:   *array = mat->v;
2063:   return 0;
2064: }

2066: PetscErrorCode MatDenseRestoreArray_SeqDense(Mat A, PetscScalar **array)
2067: {
2068:   if (array) *array = NULL;
2069:   return 0;
2070: }

2072: /*@
2073:    MatDenseGetLDA - gets the leading dimension of the array returned from `MatDenseGetArray()`

2075:    Not collective

2077:    Input Parameter:
2078: .  mat - a `MATDENSE` or `MATDENSECUDA` matrix

2080:    Output Parameter:
2081: .   lda - the leading dimension

2083:    Level: intermediate

2085: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseSetLDA()`
2086: @*/
2087: PetscErrorCode MatDenseGetLDA(Mat A, PetscInt *lda)
2088: {
2091:   MatCheckPreallocated(A, 1);
2092:   PetscUseMethod(A, "MatDenseGetLDA_C", (Mat, PetscInt *), (A, lda));
2093:   return 0;
2094: }

2096: /*@
2097:    MatDenseSetLDA - Sets the leading dimension of the array used by the `MATDENSE` matrix

2099:    Not collective

2101:    Input Parameters:
2102: +  mat - a `MATDENSE` or `MATDENSECUDA` matrix
2103: -  lda - the leading dimension

2105:    Level: intermediate

2107: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetLDA()`
2108: @*/
2109: PetscErrorCode MatDenseSetLDA(Mat A, PetscInt lda)
2110: {
2112:   PetscTryMethod(A, "MatDenseSetLDA_C", (Mat, PetscInt), (A, lda));
2113:   return 0;
2114: }

2116: /*@C
2117:    MatDenseGetArray - gives read-write access to the array where the data for a `MATDENSE` matrix is stored

2119:    Logically Collective

2121:    Input Parameter:
2122: .  mat - a dense matrix

2124:    Output Parameter:
2125: .   array - pointer to the data

2127:    Level: intermediate

2129: .seealso: `MATDENSE`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2130: @*/
2131: PetscErrorCode MatDenseGetArray(Mat A, PetscScalar **array)
2132: {
2135:   PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array));
2136:   return 0;
2137: }

2139: /*@C
2140:    MatDenseRestoreArray - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArray()`

2142:    Logically Collective

2144:    Input Parameters:
2145: +  mat - a dense matrix
2146: -  array - pointer to the data (may be NULL)

2148:    Level: intermediate

2150: .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2151: @*/
2152: PetscErrorCode MatDenseRestoreArray(Mat A, PetscScalar **array)
2153: {
2156:   PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array));
2157:   PetscObjectStateIncrease((PetscObject)A);
2158: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
2159:   A->offloadmask = PETSC_OFFLOAD_CPU;
2160: #endif
2161:   return 0;
2162: }

2164: /*@C
2165:   MatDenseGetArrayRead - gives read-only access to the array where the data for a `MATDENSE`  matrix is stored

2167:    Not Collective

2169:    Input Parameter:
2170: .  mat - a dense matrix

2172:    Output Parameter:
2173: .   array - pointer to the data

2175:    Level: intermediate

2177: .seealso: `MATDENSE`, `MatDenseRestoreArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2178: @*/
2179: PetscErrorCode MatDenseGetArrayRead(Mat A, const PetscScalar **array)
2180: {
2183:   PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, const PetscScalar **), (A, array));
2184:   return 0;
2185: }

2187: /*@C
2188:    MatDenseRestoreArrayRead - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayRead()`

2190:    Not Collective

2192:    Input Parameters:
2193: +  mat - a dense matrix
2194: -  array - pointer to the data (may be NULL)

2196:    Level: intermediate

2198: .seealso: `MATDENSE`, `MatDenseGetArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2199: @*/
2200: PetscErrorCode MatDenseRestoreArrayRead(Mat A, const PetscScalar **array)
2201: {
2204:   PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, const PetscScalar **), (A, array));
2205:   return 0;
2206: }

2208: /*@C
2209:    MatDenseGetArrayWrite - gives write-only access to the array where the data for a `MATDENSE` matrix is stored

2211:    Not Collective

2213:    Input Parameter:
2214: .  mat - a dense matrix

2216:    Output Parameter:
2217: .   array - pointer to the data

2219:    Level: intermediate

2221: .seealso: `MATDENSE`, `MatDenseRestoreArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`
2222: @*/
2223: PetscErrorCode MatDenseGetArrayWrite(Mat A, PetscScalar **array)
2224: {
2227:   PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array));
2228:   return 0;
2229: }

2231: /*@C
2232:    MatDenseRestoreArrayWrite - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayWrite()`

2234:    Not Collective

2236:    Input Parameters:
2237: +  mat - a dense matrix
2238: -  array - pointer to the data (may be NULL)

2240:    Level: intermediate

2242: .seealso: `MATDENSE`, `MatDenseGetArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`
2243: @*/
2244: PetscErrorCode MatDenseRestoreArrayWrite(Mat A, PetscScalar **array)
2245: {
2248:   PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array));
2249:   PetscObjectStateIncrease((PetscObject)A);
2250: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
2251:   A->offloadmask = PETSC_OFFLOAD_CPU;
2252: #endif
2253:   return 0;
2254: }

2256: static PetscErrorCode MatCreateSubMatrix_SeqDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
2257: {
2258:   Mat_SeqDense   *mat = (Mat_SeqDense *)A->data;
2259:   PetscInt        i, j, nrows, ncols, ldb;
2260:   const PetscInt *irow, *icol;
2261:   PetscScalar    *av, *bv, *v = mat->v;
2262:   Mat             newmat;

2264:   ISGetIndices(isrow, &irow);
2265:   ISGetIndices(iscol, &icol);
2266:   ISGetLocalSize(isrow, &nrows);
2267:   ISGetLocalSize(iscol, &ncols);

2269:   /* Check submatrixcall */
2270:   if (scall == MAT_REUSE_MATRIX) {
2271:     PetscInt n_cols, n_rows;
2272:     MatGetSize(*B, &n_rows, &n_cols);
2273:     if (n_rows != nrows || n_cols != ncols) {
2274:       /* resize the result matrix to match number of requested rows/columns */
2275:       MatSetSizes(*B, nrows, ncols, nrows, ncols);
2276:     }
2277:     newmat = *B;
2278:   } else {
2279:     /* Create and fill new matrix */
2280:     MatCreate(PetscObjectComm((PetscObject)A), &newmat);
2281:     MatSetSizes(newmat, nrows, ncols, nrows, ncols);
2282:     MatSetType(newmat, ((PetscObject)A)->type_name);
2283:     MatSeqDenseSetPreallocation(newmat, NULL);
2284:   }

2286:   /* Now extract the data pointers and do the copy,column at a time */
2287:   MatDenseGetArray(newmat, &bv);
2288:   MatDenseGetLDA(newmat, &ldb);
2289:   for (i = 0; i < ncols; i++) {
2290:     av = v + mat->lda * icol[i];
2291:     for (j = 0; j < nrows; j++) bv[j] = av[irow[j]];
2292:     bv += ldb;
2293:   }
2294:   MatDenseRestoreArray(newmat, &bv);

2296:   /* Assemble the matrices so that the correct flags are set */
2297:   MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY);
2298:   MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY);

2300:   /* Free work space */
2301:   ISRestoreIndices(isrow, &irow);
2302:   ISRestoreIndices(iscol, &icol);
2303:   *B = newmat;
2304:   return 0;
2305: }

2307: static PetscErrorCode MatCreateSubMatrices_SeqDense(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
2308: {
2309:   PetscInt i;

2311:   if (scall == MAT_INITIAL_MATRIX) PetscCalloc1(n, B);

2313:   for (i = 0; i < n; i++) MatCreateSubMatrix_SeqDense(A, irow[i], icol[i], scall, &(*B)[i]);
2314:   return 0;
2315: }

2317: static PetscErrorCode MatAssemblyBegin_SeqDense(Mat mat, MatAssemblyType mode)
2318: {
2319:   return 0;
2320: }

2322: static PetscErrorCode MatAssemblyEnd_SeqDense(Mat mat, MatAssemblyType mode)
2323: {
2324:   return 0;
2325: }

2327: PetscErrorCode MatCopy_SeqDense(Mat A, Mat B, MatStructure str)
2328: {
2329:   Mat_SeqDense      *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data;
2330:   const PetscScalar *va;
2331:   PetscScalar       *vb;
2332:   PetscInt           lda1 = a->lda, lda2 = b->lda, m = A->rmap->n, n = A->cmap->n, j;

2334:   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2335:   if (A->ops->copy != B->ops->copy) {
2336:     MatCopy_Basic(A, B, str);
2337:     return 0;
2338:   }
2340:   MatDenseGetArrayRead(A, &va);
2341:   MatDenseGetArray(B, &vb);
2342:   if (lda1 > m || lda2 > m) {
2343:     for (j = 0; j < n; j++) PetscArraycpy(vb + j * lda2, va + j * lda1, m);
2344:   } else {
2345:     PetscArraycpy(vb, va, A->rmap->n * A->cmap->n);
2346:   }
2347:   MatDenseRestoreArray(B, &vb);
2348:   MatDenseRestoreArrayRead(A, &va);
2349:   MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
2350:   MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
2351:   return 0;
2352: }

2354: PetscErrorCode MatSetUp_SeqDense(Mat A)
2355: {
2356:   PetscLayoutSetUp(A->rmap);
2357:   PetscLayoutSetUp(A->cmap);
2358:   if (!A->preallocated) MatSeqDenseSetPreallocation(A, NULL);
2359:   return 0;
2360: }

2362: static PetscErrorCode MatConjugate_SeqDense(Mat A)
2363: {
2364:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2365:   PetscInt      i, j;
2366:   PetscInt      min = PetscMin(A->rmap->n, A->cmap->n);
2367:   PetscScalar  *aa;

2369:   MatDenseGetArray(A, &aa);
2370:   for (j = 0; j < A->cmap->n; j++) {
2371:     for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscConj(aa[i + j * mat->lda]);
2372:   }
2373:   MatDenseRestoreArray(A, &aa);
2374:   if (mat->tau)
2375:     for (i = 0; i < min; i++) mat->tau[i] = PetscConj(mat->tau[i]);
2376:   return 0;
2377: }

2379: static PetscErrorCode MatRealPart_SeqDense(Mat A)
2380: {
2381:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2382:   PetscInt      i, j;
2383:   PetscScalar  *aa;

2385:   MatDenseGetArray(A, &aa);
2386:   for (j = 0; j < A->cmap->n; j++) {
2387:     for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscRealPart(aa[i + j * mat->lda]);
2388:   }
2389:   MatDenseRestoreArray(A, &aa);
2390:   return 0;
2391: }

2393: static PetscErrorCode MatImaginaryPart_SeqDense(Mat A)
2394: {
2395:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2396:   PetscInt      i, j;
2397:   PetscScalar  *aa;

2399:   MatDenseGetArray(A, &aa);
2400:   for (j = 0; j < A->cmap->n; j++) {
2401:     for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscImaginaryPart(aa[i + j * mat->lda]);
2402:   }
2403:   MatDenseRestoreArray(A, &aa);
2404:   return 0;
2405: }

2407: /* ----------------------------------------------------------------*/
2408: PetscErrorCode MatMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
2409: {
2410:   PetscInt  m = A->rmap->n, n = B->cmap->n;
2411:   PetscBool cisdense = PETSC_FALSE;

2413:   MatSetSizes(C, m, n, m, n);
2414: #if defined(PETSC_HAVE_CUDA)
2415:   PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "");
2416: #endif
2417: #if defined(PETSC_HAVE_HIP)
2418:   PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "");
2419: #endif
2420:   if (!cisdense) {
2421:     PetscBool flg;

2423:     PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg);
2424:     MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE);
2425:   }
2426:   MatSetUp(C);
2427:   return 0;
2428: }

2430: PetscErrorCode MatMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C)
2431: {
2432:   Mat_SeqDense      *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data, *c = (Mat_SeqDense *)C->data;
2433:   PetscBLASInt       m, n, k;
2434:   const PetscScalar *av, *bv;
2435:   PetscScalar       *cv;
2436:   PetscScalar        _DOne = 1.0, _DZero = 0.0;

2438:   PetscBLASIntCast(C->rmap->n, &m);
2439:   PetscBLASIntCast(C->cmap->n, &n);
2440:   PetscBLASIntCast(A->cmap->n, &k);
2441:   if (!m || !n || !k) return 0;
2442:   MatDenseGetArrayRead(A, &av);
2443:   MatDenseGetArrayRead(B, &bv);
2444:   MatDenseGetArrayWrite(C, &cv);
2445:   PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda));
2446:   PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1));
2447:   MatDenseRestoreArrayRead(A, &av);
2448:   MatDenseRestoreArrayRead(B, &bv);
2449:   MatDenseRestoreArrayWrite(C, &cv);
2450:   return 0;
2451: }

2453: PetscErrorCode MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
2454: {
2455:   PetscInt  m = A->rmap->n, n = B->rmap->n;
2456:   PetscBool cisdense = PETSC_FALSE;

2458:   MatSetSizes(C, m, n, m, n);
2459: #if defined(PETSC_HAVE_CUDA)
2460:   PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "");
2461: #endif
2462: #if defined(PETSC_HAVE_HIP)
2463:   PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "");
2464: #endif
2465:   if (!cisdense) {
2466:     PetscBool flg;

2468:     PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg);
2469:     MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE);
2470:   }
2471:   MatSetUp(C);
2472:   return 0;
2473: }

2475: PetscErrorCode MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C)
2476: {
2477:   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
2478:   Mat_SeqDense      *b = (Mat_SeqDense *)B->data;
2479:   Mat_SeqDense      *c = (Mat_SeqDense *)C->data;
2480:   const PetscScalar *av, *bv;
2481:   PetscScalar       *cv;
2482:   PetscBLASInt       m, n, k;
2483:   PetscScalar        _DOne = 1.0, _DZero = 0.0;

2485:   PetscBLASIntCast(C->rmap->n, &m);
2486:   PetscBLASIntCast(C->cmap->n, &n);
2487:   PetscBLASIntCast(A->cmap->n, &k);
2488:   if (!m || !n || !k) return 0;
2489:   MatDenseGetArrayRead(A, &av);
2490:   MatDenseGetArrayRead(B, &bv);
2491:   MatDenseGetArrayWrite(C, &cv);
2492:   PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda));
2493:   MatDenseRestoreArrayRead(A, &av);
2494:   MatDenseRestoreArrayRead(B, &bv);
2495:   MatDenseRestoreArrayWrite(C, &cv);
2496:   PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1));
2497:   return 0;
2498: }

2500: PetscErrorCode MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
2501: {
2502:   PetscInt  m = A->cmap->n, n = B->cmap->n;
2503:   PetscBool cisdense = PETSC_FALSE;

2505:   MatSetSizes(C, m, n, m, n);
2506: #if defined(PETSC_HAVE_CUDA)
2507:   PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "");
2508: #endif
2509: #if defined(PETSC_HAVE_HIP)
2510:   PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "");
2511: #endif
2512:   if (!cisdense) {
2513:     PetscBool flg;

2515:     PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg);
2516:     MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE);
2517:   }
2518:   MatSetUp(C);
2519:   return 0;
2520: }

2522: PetscErrorCode MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C)
2523: {
2524:   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
2525:   Mat_SeqDense      *b = (Mat_SeqDense *)B->data;
2526:   Mat_SeqDense      *c = (Mat_SeqDense *)C->data;
2527:   const PetscScalar *av, *bv;
2528:   PetscScalar       *cv;
2529:   PetscBLASInt       m, n, k;
2530:   PetscScalar        _DOne = 1.0, _DZero = 0.0;

2532:   PetscBLASIntCast(C->rmap->n, &m);
2533:   PetscBLASIntCast(C->cmap->n, &n);
2534:   PetscBLASIntCast(A->rmap->n, &k);
2535:   if (!m || !n || !k) return 0;
2536:   MatDenseGetArrayRead(A, &av);
2537:   MatDenseGetArrayRead(B, &bv);
2538:   MatDenseGetArrayWrite(C, &cv);
2539:   PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda));
2540:   MatDenseRestoreArrayRead(A, &av);
2541:   MatDenseRestoreArrayRead(B, &bv);
2542:   MatDenseRestoreArrayWrite(C, &cv);
2543:   PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1));
2544:   return 0;
2545: }

2547: /* ----------------------------------------------- */
2548: static PetscErrorCode MatProductSetFromOptions_SeqDense_AB(Mat C)
2549: {
2550:   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqDense;
2551:   C->ops->productsymbolic = MatProductSymbolic_AB;
2552:   return 0;
2553: }

2555: static PetscErrorCode MatProductSetFromOptions_SeqDense_AtB(Mat C)
2556: {
2557:   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqDense_SeqDense;
2558:   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2559:   return 0;
2560: }

2562: static PetscErrorCode MatProductSetFromOptions_SeqDense_ABt(Mat C)
2563: {
2564:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqDense_SeqDense;
2565:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2566:   return 0;
2567: }

2569: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense(Mat C)
2570: {
2571:   Mat_Product *product = C->product;

2573:   switch (product->type) {
2574:   case MATPRODUCT_AB:
2575:     MatProductSetFromOptions_SeqDense_AB(C);
2576:     break;
2577:   case MATPRODUCT_AtB:
2578:     MatProductSetFromOptions_SeqDense_AtB(C);
2579:     break;
2580:   case MATPRODUCT_ABt:
2581:     MatProductSetFromOptions_SeqDense_ABt(C);
2582:     break;
2583:   default:
2584:     break;
2585:   }
2586:   return 0;
2587: }
2588: /* ----------------------------------------------- */

2590: static PetscErrorCode MatGetRowMax_SeqDense(Mat A, Vec v, PetscInt idx[])
2591: {
2592:   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
2593:   PetscInt           i, j, m = A->rmap->n, n = A->cmap->n, p;
2594:   PetscScalar       *x;
2595:   const PetscScalar *aa;

2598:   VecGetArray(v, &x);
2599:   VecGetLocalSize(v, &p);
2600:   MatDenseGetArrayRead(A, &aa);
2602:   for (i = 0; i < m; i++) {
2603:     x[i] = aa[i];
2604:     if (idx) idx[i] = 0;
2605:     for (j = 1; j < n; j++) {
2606:       if (PetscRealPart(x[i]) < PetscRealPart(aa[i + a->lda * j])) {
2607:         x[i] = aa[i + a->lda * j];
2608:         if (idx) idx[i] = j;
2609:       }
2610:     }
2611:   }
2612:   MatDenseRestoreArrayRead(A, &aa);
2613:   VecRestoreArray(v, &x);
2614:   return 0;
2615: }

2617: static PetscErrorCode MatGetRowMaxAbs_SeqDense(Mat A, Vec v, PetscInt idx[])
2618: {
2619:   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
2620:   PetscInt           i, j, m = A->rmap->n, n = A->cmap->n, p;
2621:   PetscScalar       *x;
2622:   PetscReal          atmp;
2623:   const PetscScalar *aa;

2626:   VecGetArray(v, &x);
2627:   VecGetLocalSize(v, &p);
2628:   MatDenseGetArrayRead(A, &aa);
2630:   for (i = 0; i < m; i++) {
2631:     x[i] = PetscAbsScalar(aa[i]);
2632:     for (j = 1; j < n; j++) {
2633:       atmp = PetscAbsScalar(aa[i + a->lda * j]);
2634:       if (PetscAbsScalar(x[i]) < atmp) {
2635:         x[i] = atmp;
2636:         if (idx) idx[i] = j;
2637:       }
2638:     }
2639:   }
2640:   MatDenseRestoreArrayRead(A, &aa);
2641:   VecRestoreArray(v, &x);
2642:   return 0;
2643: }

2645: static PetscErrorCode MatGetRowMin_SeqDense(Mat A, Vec v, PetscInt idx[])
2646: {
2647:   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
2648:   PetscInt           i, j, m = A->rmap->n, n = A->cmap->n, p;
2649:   PetscScalar       *x;
2650:   const PetscScalar *aa;

2653:   MatDenseGetArrayRead(A, &aa);
2654:   VecGetArray(v, &x);
2655:   VecGetLocalSize(v, &p);
2657:   for (i = 0; i < m; i++) {
2658:     x[i] = aa[i];
2659:     if (idx) idx[i] = 0;
2660:     for (j = 1; j < n; j++) {
2661:       if (PetscRealPart(x[i]) > PetscRealPart(aa[i + a->lda * j])) {
2662:         x[i] = aa[i + a->lda * j];
2663:         if (idx) idx[i] = j;
2664:       }
2665:     }
2666:   }
2667:   VecRestoreArray(v, &x);
2668:   MatDenseRestoreArrayRead(A, &aa);
2669:   return 0;
2670: }

2672: PetscErrorCode MatGetColumnVector_SeqDense(Mat A, Vec v, PetscInt col)
2673: {
2674:   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
2675:   PetscScalar       *x;
2676:   const PetscScalar *aa;

2679:   MatDenseGetArrayRead(A, &aa);
2680:   VecGetArray(v, &x);
2681:   PetscArraycpy(x, aa + col * a->lda, A->rmap->n);
2682:   VecRestoreArray(v, &x);
2683:   MatDenseRestoreArrayRead(A, &aa);
2684:   return 0;
2685: }

2687: PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat A, PetscInt type, PetscReal *reductions)
2688: {
2689:   PetscInt           i, j, m, n;
2690:   const PetscScalar *a;

2692:   MatGetSize(A, &m, &n);
2693:   PetscArrayzero(reductions, n);
2694:   MatDenseGetArrayRead(A, &a);
2695:   if (type == NORM_2) {
2696:     for (i = 0; i < n; i++) {
2697:       for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j] * a[j]);
2698:       a += m;
2699:     }
2700:   } else if (type == NORM_1) {
2701:     for (i = 0; i < n; i++) {
2702:       for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j]);
2703:       a += m;
2704:     }
2705:   } else if (type == NORM_INFINITY) {
2706:     for (i = 0; i < n; i++) {
2707:       for (j = 0; j < m; j++) reductions[i] = PetscMax(PetscAbsScalar(a[j]), reductions[i]);
2708:       a += m;
2709:     }
2710:   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2711:     for (i = 0; i < n; i++) {
2712:       for (j = 0; j < m; j++) reductions[i] += PetscRealPart(a[j]);
2713:       a += m;
2714:     }
2715:   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2716:     for (i = 0; i < n; i++) {
2717:       for (j = 0; j < m; j++) reductions[i] += PetscImaginaryPart(a[j]);
2718:       a += m;
2719:     }
2720:   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2721:   MatDenseRestoreArrayRead(A, &a);
2722:   if (type == NORM_2) {
2723:     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2724:   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2725:     for (i = 0; i < n; i++) reductions[i] /= m;
2726:   }
2727:   return 0;
2728: }

2730: PetscErrorCode MatSetRandom_SeqDense(Mat x, PetscRandom rctx)
2731: {
2732:   PetscScalar *a;
2733:   PetscInt     lda, m, n, i, j;

2735:   MatGetSize(x, &m, &n);
2736:   MatDenseGetLDA(x, &lda);
2737:   MatDenseGetArrayWrite(x, &a);
2738:   for (j = 0; j < n; j++) {
2739:     for (i = 0; i < m; i++) PetscRandomGetValue(rctx, a + j * lda + i);
2740:   }
2741:   MatDenseRestoreArrayWrite(x, &a);
2742:   return 0;
2743: }

2745: static PetscErrorCode MatMissingDiagonal_SeqDense(Mat A, PetscBool *missing, PetscInt *d)
2746: {
2747:   *missing = PETSC_FALSE;
2748:   return 0;
2749: }

2751: /* vals is not const */
2752: static PetscErrorCode MatDenseGetColumn_SeqDense(Mat A, PetscInt col, PetscScalar **vals)
2753: {
2754:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
2755:   PetscScalar  *v;

2758:   MatDenseGetArray(A, &v);
2759:   *vals = v + col * a->lda;
2760:   MatDenseRestoreArray(A, &v);
2761:   return 0;
2762: }

2764: static PetscErrorCode MatDenseRestoreColumn_SeqDense(Mat A, PetscScalar **vals)
2765: {
2766:   if (vals) *vals = NULL; /* user cannot accidentally use the array later */
2767:   return 0;
2768: }

2770: /* -------------------------------------------------------------------*/
2771: static struct _MatOps MatOps_Values = {MatSetValues_SeqDense,
2772:                                        MatGetRow_SeqDense,
2773:                                        MatRestoreRow_SeqDense,
2774:                                        MatMult_SeqDense,
2775:                                        /*  4*/ MatMultAdd_SeqDense,
2776:                                        MatMultTranspose_SeqDense,
2777:                                        MatMultTransposeAdd_SeqDense,
2778:                                        NULL,
2779:                                        NULL,
2780:                                        NULL,
2781:                                        /* 10*/ NULL,
2782:                                        MatLUFactor_SeqDense,
2783:                                        MatCholeskyFactor_SeqDense,
2784:                                        MatSOR_SeqDense,
2785:                                        MatTranspose_SeqDense,
2786:                                        /* 15*/ MatGetInfo_SeqDense,
2787:                                        MatEqual_SeqDense,
2788:                                        MatGetDiagonal_SeqDense,
2789:                                        MatDiagonalScale_SeqDense,
2790:                                        MatNorm_SeqDense,
2791:                                        /* 20*/ MatAssemblyBegin_SeqDense,
2792:                                        MatAssemblyEnd_SeqDense,
2793:                                        MatSetOption_SeqDense,
2794:                                        MatZeroEntries_SeqDense,
2795:                                        /* 24*/ MatZeroRows_SeqDense,
2796:                                        NULL,
2797:                                        NULL,
2798:                                        NULL,
2799:                                        NULL,
2800:                                        /* 29*/ MatSetUp_SeqDense,
2801:                                        NULL,
2802:                                        NULL,
2803:                                        NULL,
2804:                                        NULL,
2805:                                        /* 34*/ MatDuplicate_SeqDense,
2806:                                        NULL,
2807:                                        NULL,
2808:                                        NULL,
2809:                                        NULL,
2810:                                        /* 39*/ MatAXPY_SeqDense,
2811:                                        MatCreateSubMatrices_SeqDense,
2812:                                        NULL,
2813:                                        MatGetValues_SeqDense,
2814:                                        MatCopy_SeqDense,
2815:                                        /* 44*/ MatGetRowMax_SeqDense,
2816:                                        MatScale_SeqDense,
2817:                                        MatShift_SeqDense,
2818:                                        NULL,
2819:                                        MatZeroRowsColumns_SeqDense,
2820:                                        /* 49*/ MatSetRandom_SeqDense,
2821:                                        NULL,
2822:                                        NULL,
2823:                                        NULL,
2824:                                        NULL,
2825:                                        /* 54*/ NULL,
2826:                                        NULL,
2827:                                        NULL,
2828:                                        NULL,
2829:                                        NULL,
2830:                                        /* 59*/ MatCreateSubMatrix_SeqDense,
2831:                                        MatDestroy_SeqDense,
2832:                                        MatView_SeqDense,
2833:                                        NULL,
2834:                                        NULL,
2835:                                        /* 64*/ NULL,
2836:                                        NULL,
2837:                                        NULL,
2838:                                        NULL,
2839:                                        NULL,
2840:                                        /* 69*/ MatGetRowMaxAbs_SeqDense,
2841:                                        NULL,
2842:                                        NULL,
2843:                                        NULL,
2844:                                        NULL,
2845:                                        /* 74*/ NULL,
2846:                                        NULL,
2847:                                        NULL,
2848:                                        NULL,
2849:                                        NULL,
2850:                                        /* 79*/ NULL,
2851:                                        NULL,
2852:                                        NULL,
2853:                                        NULL,
2854:                                        /* 83*/ MatLoad_SeqDense,
2855:                                        MatIsSymmetric_SeqDense,
2856:                                        MatIsHermitian_SeqDense,
2857:                                        NULL,
2858:                                        NULL,
2859:                                        NULL,
2860:                                        /* 89*/ NULL,
2861:                                        NULL,
2862:                                        MatMatMultNumeric_SeqDense_SeqDense,
2863:                                        NULL,
2864:                                        NULL,
2865:                                        /* 94*/ NULL,
2866:                                        NULL,
2867:                                        NULL,
2868:                                        MatMatTransposeMultNumeric_SeqDense_SeqDense,
2869:                                        NULL,
2870:                                        /* 99*/ MatProductSetFromOptions_SeqDense,
2871:                                        NULL,
2872:                                        NULL,
2873:                                        MatConjugate_SeqDense,
2874:                                        NULL,
2875:                                        /*104*/ NULL,
2876:                                        MatRealPart_SeqDense,
2877:                                        MatImaginaryPart_SeqDense,
2878:                                        NULL,
2879:                                        NULL,
2880:                                        /*109*/ NULL,
2881:                                        NULL,
2882:                                        MatGetRowMin_SeqDense,
2883:                                        MatGetColumnVector_SeqDense,
2884:                                        MatMissingDiagonal_SeqDense,
2885:                                        /*114*/ NULL,
2886:                                        NULL,
2887:                                        NULL,
2888:                                        NULL,
2889:                                        NULL,
2890:                                        /*119*/ NULL,
2891:                                        NULL,
2892:                                        NULL,
2893:                                        NULL,
2894:                                        NULL,
2895:                                        /*124*/ NULL,
2896:                                        MatGetColumnReductions_SeqDense,
2897:                                        NULL,
2898:                                        NULL,
2899:                                        NULL,
2900:                                        /*129*/ NULL,
2901:                                        NULL,
2902:                                        NULL,
2903:                                        MatTransposeMatMultNumeric_SeqDense_SeqDense,
2904:                                        NULL,
2905:                                        /*134*/ NULL,
2906:                                        NULL,
2907:                                        NULL,
2908:                                        NULL,
2909:                                        NULL,
2910:                                        /*139*/ NULL,
2911:                                        NULL,
2912:                                        NULL,
2913:                                        NULL,
2914:                                        NULL,
2915:                                        MatCreateMPIMatConcatenateSeqMat_SeqDense,
2916:                                        /*145*/ NULL,
2917:                                        NULL,
2918:                                        NULL,
2919:                                        NULL,
2920:                                        NULL,
2921:                                        /*150*/ NULL};

2923: /*@C
2924:    MatCreateSeqDense - Creates a `MATSEQDENSE` that
2925:    is stored in column major order (the usual Fortran 77 manner). Many
2926:    of the matrix operations use the BLAS and LAPACK routines.

2928:    Collective

2930:    Input Parameters:
2931: +  comm - MPI communicator, set to `PETSC_COMM_SELF`
2932: .  m - number of rows
2933: .  n - number of columns
2934: -  data - optional location of matrix data in column major order.  Set data=NULL for PETSc
2935:    to control all matrix memory allocation.

2937:    Output Parameter:
2938: .  A - the matrix

2940:    Note:
2941:    The data input variable is intended primarily for Fortran programmers
2942:    who wish to allocate their own matrix memory space.  Most users should
2943:    set data=NULL.

2945:    Level: intermediate

2947: .seealso: `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
2948: @*/
2949: PetscErrorCode MatCreateSeqDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscScalar *data, Mat *A)
2950: {
2951:   MatCreate(comm, A);
2952:   MatSetSizes(*A, m, n, m, n);
2953:   MatSetType(*A, MATSEQDENSE);
2954:   MatSeqDenseSetPreallocation(*A, data);
2955:   return 0;
2956: }

2958: /*@C
2959:    MatSeqDenseSetPreallocation - Sets the array used for storing the matrix elements of a `MATSEQDENSE` matrix

2961:    Collective

2963:    Input Parameters:
2964: +  B - the matrix
2965: -  data - the array (or NULL)

2967:    Note:
2968:    The data input variable is intended primarily for Fortran programmers
2969:    who wish to allocate their own matrix memory space.  Most users should
2970:    need not call this routine.

2972:    Level: intermediate

2974: .seealso: `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`, `MatDenseSetLDA()`
2975: @*/
2976: PetscErrorCode MatSeqDenseSetPreallocation(Mat B, PetscScalar data[])
2977: {
2979:   PetscTryMethod(B, "MatSeqDenseSetPreallocation_C", (Mat, PetscScalar[]), (B, data));
2980:   return 0;
2981: }

2983: PetscErrorCode MatSeqDenseSetPreallocation_SeqDense(Mat B, PetscScalar *data)
2984: {
2985:   Mat_SeqDense *b = (Mat_SeqDense *)B->data;

2988:   B->preallocated = PETSC_TRUE;

2990:   PetscLayoutSetUp(B->rmap);
2991:   PetscLayoutSetUp(B->cmap);

2993:   if (b->lda <= 0) b->lda = B->rmap->n;

2995:   if (!data) { /* petsc-allocated storage */
2996:     if (!b->user_alloc) PetscFree(b->v);
2997:     PetscCalloc1((size_t)b->lda * B->cmap->n, &b->v);

2999:     b->user_alloc = PETSC_FALSE;
3000:   } else { /* user-allocated storage */
3001:     if (!b->user_alloc) PetscFree(b->v);
3002:     b->v          = data;
3003:     b->user_alloc = PETSC_TRUE;
3004:   }
3005:   B->assembled = PETSC_TRUE;
3006:   return 0;
3007: }

3009: #if defined(PETSC_HAVE_ELEMENTAL)
3010: PETSC_INTERN PetscErrorCode MatConvert_SeqDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
3011: {
3012:   Mat                mat_elemental;
3013:   const PetscScalar *array;
3014:   PetscScalar       *v_colwise;
3015:   PetscInt           M = A->rmap->N, N = A->cmap->N, i, j, k, *rows, *cols;

3017:   PetscMalloc3(M * N, &v_colwise, M, &rows, N, &cols);
3018:   MatDenseGetArrayRead(A, &array);
3019:   /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */
3020:   k = 0;
3021:   for (j = 0; j < N; j++) {
3022:     cols[j] = j;
3023:     for (i = 0; i < M; i++) v_colwise[j * M + i] = array[k++];
3024:   }
3025:   for (i = 0; i < M; i++) rows[i] = i;
3026:   MatDenseRestoreArrayRead(A, &array);

3028:   MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);
3029:   MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N);
3030:   MatSetType(mat_elemental, MATELEMENTAL);
3031:   MatSetUp(mat_elemental);

3033:   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
3034:   MatSetValues(mat_elemental, M, rows, N, cols, v_colwise, ADD_VALUES);
3035:   MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);
3036:   MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);
3037:   PetscFree3(v_colwise, rows, cols);

3039:   if (reuse == MAT_INPLACE_MATRIX) {
3040:     MatHeaderReplace(A, &mat_elemental);
3041:   } else {
3042:     *newmat = mat_elemental;
3043:   }
3044:   return 0;
3045: }
3046: #endif

3048: PetscErrorCode MatDenseSetLDA_SeqDense(Mat B, PetscInt lda)
3049: {
3050:   Mat_SeqDense *b = (Mat_SeqDense *)B->data;
3051:   PetscBool     data;

3053:   data = (PetscBool)((B->rmap->n > 0 && B->cmap->n > 0) ? (b->v ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE);
3056:   b->lda = lda;
3057:   return 0;
3058: }

3060: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3061: {
3062:   MatCreateMPIMatConcatenateSeqMat_MPIDense(comm, inmat, n, scall, outmat);
3063:   return 0;
3064: }

3066: PetscErrorCode MatDenseGetColumnVec_SeqDense(Mat A, PetscInt col, Vec *v)
3067: {
3068:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;

3072:   if (!a->cvec) { VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec); }
3073:   a->vecinuse = col + 1;
3074:   MatDenseGetArray(A, (PetscScalar **)&a->ptrinuse);
3075:   VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda);
3076:   *v = a->cvec;
3077:   return 0;
3078: }

3080: PetscErrorCode MatDenseRestoreColumnVec_SeqDense(Mat A, PetscInt col, Vec *v)
3081: {
3082:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;

3086:   a->vecinuse = 0;
3087:   MatDenseRestoreArray(A, (PetscScalar **)&a->ptrinuse);
3088:   VecResetArray(a->cvec);
3089:   if (v) *v = NULL;
3090:   return 0;
3091: }

3093: PetscErrorCode MatDenseGetColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v)
3094: {
3095:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;

3099:   if (!a->cvec) { VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec); }
3100:   a->vecinuse = col + 1;
3101:   MatDenseGetArrayRead(A, &a->ptrinuse);
3102:   VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda);
3103:   VecLockReadPush(a->cvec);
3104:   *v = a->cvec;
3105:   return 0;
3106: }

3108: PetscErrorCode MatDenseRestoreColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v)
3109: {
3110:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;

3114:   a->vecinuse = 0;
3115:   MatDenseRestoreArrayRead(A, &a->ptrinuse);
3116:   VecLockReadPop(a->cvec);
3117:   VecResetArray(a->cvec);
3118:   if (v) *v = NULL;
3119:   return 0;
3120: }

3122: PetscErrorCode MatDenseGetColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v)
3123: {
3124:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;

3128:   if (!a->cvec) { VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec); }
3129:   a->vecinuse = col + 1;
3130:   MatDenseGetArrayWrite(A, (PetscScalar **)&a->ptrinuse);
3131:   VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda);
3132:   *v = a->cvec;
3133:   return 0;
3134: }

3136: PetscErrorCode MatDenseRestoreColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v)
3137: {
3138:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;

3142:   a->vecinuse = 0;
3143:   MatDenseRestoreArrayWrite(A, (PetscScalar **)&a->ptrinuse);
3144:   VecResetArray(a->cvec);
3145:   if (v) *v = NULL;
3146:   return 0;
3147: }

3149: PetscErrorCode MatDenseGetSubMatrix_SeqDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
3150: {
3151:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;

3155:   if (a->cmat && (cend - cbegin != a->cmat->cmap->N || rend - rbegin != a->cmat->rmap->N)) MatDestroy(&a->cmat);
3156:   if (!a->cmat) {
3157:     MatCreateDense(PetscObjectComm((PetscObject)A), rend - rbegin, PETSC_DECIDE, rend - rbegin, cend - cbegin, a->v + rbegin + (size_t)cbegin * a->lda, &a->cmat);
3158:   } else {
3159:     MatDensePlaceArray(a->cmat, a->v + rbegin + (size_t)cbegin * a->lda);
3160:   }
3161:   MatDenseSetLDA(a->cmat, a->lda);
3162:   a->matinuse = cbegin + 1;
3163:   *v          = a->cmat;
3164: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
3165:   A->offloadmask = PETSC_OFFLOAD_CPU;
3166: #endif
3167:   return 0;
3168: }

3170: PetscErrorCode MatDenseRestoreSubMatrix_SeqDense(Mat A, Mat *v)
3171: {
3172:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;

3177:   a->matinuse = 0;
3178:   MatDenseResetArray(a->cmat);
3179:   if (v) *v = NULL;
3180: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
3181:   A->offloadmask = PETSC_OFFLOAD_CPU;
3182: #endif
3183:   return 0;
3184: }

3186: /*MC
3187:    MATSEQDENSE - MATSEQDENSE = "seqdense" - A matrix type to be used for sequential dense matrices.

3189:    Options Database Keys:
3190: . -mat_type seqdense - sets the matrix type to `MATSEQDENSE` during a call to `MatSetFromOptions()`

3192:   Level: beginner

3194: .seealso: `MATSEQDENSE`, `MatCreateSeqDense()`
3195: M*/
3196: PetscErrorCode MatCreate_SeqDense(Mat B)
3197: {
3198:   Mat_SeqDense *b;
3199:   PetscMPIInt   size;

3201:   MPI_Comm_size(PetscObjectComm((PetscObject)B), &size);

3204:   PetscNew(&b);
3205:   PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps));
3206:   B->data = (void *)b;

3208:   b->roworiented = PETSC_TRUE;

3210:   PetscObjectComposeFunction((PetscObject)B, "MatQRFactor_C", MatQRFactor_SeqDense);
3211:   PetscObjectComposeFunction((PetscObject)B, "MatDenseGetLDA_C", MatDenseGetLDA_SeqDense);
3212:   PetscObjectComposeFunction((PetscObject)B, "MatDenseSetLDA_C", MatDenseSetLDA_SeqDense);
3213:   PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArray_C", MatDenseGetArray_SeqDense);
3214:   PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArray_C", MatDenseRestoreArray_SeqDense);
3215:   PetscObjectComposeFunction((PetscObject)B, "MatDensePlaceArray_C", MatDensePlaceArray_SeqDense);
3216:   PetscObjectComposeFunction((PetscObject)B, "MatDenseResetArray_C", MatDenseResetArray_SeqDense);
3217:   PetscObjectComposeFunction((PetscObject)B, "MatDenseReplaceArray_C", MatDenseReplaceArray_SeqDense);
3218:   PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayRead_C", MatDenseGetArray_SeqDense);
3219:   PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayRead_C", MatDenseRestoreArray_SeqDense);
3220:   PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayWrite_C", MatDenseGetArray_SeqDense);
3221:   PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArray_SeqDense);
3222:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqaij_C", MatConvert_SeqDense_SeqAIJ);
3223: #if defined(PETSC_HAVE_ELEMENTAL)
3224:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_elemental_C", MatConvert_SeqDense_Elemental);
3225: #endif
3226: #if defined(PETSC_HAVE_SCALAPACK)
3227:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_scalapack_C", MatConvert_Dense_ScaLAPACK);
3228: #endif
3229: #if defined(PETSC_HAVE_CUDA)
3230:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensecuda_C", MatConvert_SeqDense_SeqDenseCUDA);
3231:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", MatProductSetFromOptions_SeqDense);
3232:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdense_C", MatProductSetFromOptions_SeqDense);
3233:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensecuda_C", MatProductSetFromOptions_SeqDense);
3234: #endif
3235: #if defined(PETSC_HAVE_HIP)
3236:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensehip_C", MatConvert_SeqDense_SeqDenseHIP);
3237:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", MatProductSetFromOptions_SeqDense);
3238:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdense_C", MatProductSetFromOptions_SeqDense);
3239:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensehip_C", MatProductSetFromOptions_SeqDense);
3240: #endif
3241:   PetscObjectComposeFunction((PetscObject)B, "MatSeqDenseSetPreallocation_C", MatSeqDenseSetPreallocation_SeqDense);
3242:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense);
3243:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdense_C", MatProductSetFromOptions_SeqDense);
3244:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense);
3245:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqsbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense);

3247:   PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumn_C", MatDenseGetColumn_SeqDense);
3248:   PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_SeqDense);
3249:   PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_SeqDense);
3250:   PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_SeqDense);
3251:   PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_SeqDense);
3252:   PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_SeqDense);
3253:   PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_SeqDense);
3254:   PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_SeqDense);
3255:   PetscObjectComposeFunction((PetscObject)B, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_SeqDense);
3256:   PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_SeqDense);
3257:   PetscObjectChangeTypeName((PetscObject)B, MATSEQDENSE);
3258:   return 0;
3259: }

3261: /*@C
3262:    MatDenseGetColumn - gives access to a column of a dense matrix. This is only the local part of the column. You MUST call `MatDenseRestoreColumn()` to avoid memory bleeding.

3264:    Not Collective

3266:    Input Parameters:
3267: +  mat - a `MATSEQDENSE` or `MATMPIDENSE` matrix
3268: -  col - column index

3270:    Output Parameter:
3271: .  vals - pointer to the data

3273:    Level: intermediate

3275:    Note:
3276:    Use `MatDenseGetColumnVec()` to get access to a column of a `MATDENSE` treated as a `Vec`

3278: .seealso: `MATDENSE`, `MatDenseRestoreColumn()`, `MatDenseGetColumnVec()`
3279: @*/
3280: PetscErrorCode MatDenseGetColumn(Mat A, PetscInt col, PetscScalar **vals)
3281: {
3285:   PetscUseMethod(A, "MatDenseGetColumn_C", (Mat, PetscInt, PetscScalar **), (A, col, vals));
3286:   return 0;
3287: }

3289: /*@C
3290:    MatDenseRestoreColumn - returns access to a column of a `MATDENSE` matrix which is returned by `MatDenseGetColumn()`.

3292:    Not Collective

3294:    Input Parameters:
3295: +  mat - a `MATSEQDENSE` or `MATMPIDENSE` matrix
3296: -  vals - pointer to the data (may be NULL)

3298:    Level: intermediate

3300: .seealso: `MATDENSE`, `MatDenseGetColumn()`
3301: @*/
3302: PetscErrorCode MatDenseRestoreColumn(Mat A, PetscScalar **vals)
3303: {
3306:   PetscUseMethod(A, "MatDenseRestoreColumn_C", (Mat, PetscScalar **), (A, vals));
3307:   return 0;
3308: }

3310: /*@
3311:    MatDenseGetColumnVec - Gives read-write access to a column of a `MATDENSE` matrix, represented as a `Vec`.

3313:    Collective

3315:    Input Parameters:
3316: +  mat - the `Mat` object
3317: -  col - the column index

3319:    Output Parameter:
3320: .  v - the vector

3322:    Notes:
3323:      The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVec()` when the vector is no longer needed.

3325:      Use `MatDenseGetColumnVecRead()` to obtain read-only access or `MatDenseGetColumnVecWrite()` for write-only access.

3327:    Level: intermediate

3329: .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`, `MatDenseGetColumn()`
3330: @*/
3331: PetscErrorCode MatDenseGetColumnVec(Mat A, PetscInt col, Vec *v)
3332: {
3339:   PetscUseMethod(A, "MatDenseGetColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v));
3340:   return 0;
3341: }

3343: /*@
3344:    MatDenseRestoreColumnVec - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVec().

3346:    Collective

3348:    Input Parameters:
3349: +  mat - the Mat object
3350: .  col - the column index
3351: -  v - the Vec object (may be NULL)

3353:    Level: intermediate

3355: .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`
3356: @*/
3357: PetscErrorCode MatDenseRestoreColumnVec(Mat A, PetscInt col, Vec *v)
3358: {
3364:   PetscUseMethod(A, "MatDenseRestoreColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v));
3365:   return 0;
3366: }

3368: /*@
3369:    MatDenseGetColumnVecRead - Gives read-only access to a column of a dense matrix, represented as a Vec.

3371:    Collective

3373:    Input Parameters:
3374: +  mat - the Mat object
3375: -  col - the column index

3377:    Output Parameter:
3378: .  v - the vector

3380:    Notes:
3381:      The vector is owned by PETSc and users cannot modify it.

3383:      Users need to call MatDenseRestoreColumnVecRead() when the vector is no longer needed.

3385:      Use MatDenseGetColumnVec() to obtain read-write access or MatDenseGetColumnVecWrite() for write-only access.

3387:    Level: intermediate

3389: .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`
3390: @*/
3391: PetscErrorCode MatDenseGetColumnVecRead(Mat A, PetscInt col, Vec *v)
3392: {
3399:   PetscUseMethod(A, "MatDenseGetColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v));
3400:   return 0;
3401: }

3403: /*@
3404:    MatDenseRestoreColumnVecRead - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVecRead().

3406:    Collective

3408:    Input Parameters:
3409: +  mat - the Mat object
3410: .  col - the column index
3411: -  v - the Vec object (may be NULL)

3413:    Level: intermediate

3415: .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecWrite()`
3416: @*/
3417: PetscErrorCode MatDenseRestoreColumnVecRead(Mat A, PetscInt col, Vec *v)
3418: {
3424:   PetscUseMethod(A, "MatDenseRestoreColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v));
3425:   return 0;
3426: }

3428: /*@
3429:    MatDenseGetColumnVecWrite - Gives write-only access to a column of a dense matrix, represented as a Vec.

3431:    Collective

3433:    Input Parameters:
3434: +  mat - the Mat object
3435: -  col - the column index

3437:    Output Parameter:
3438: .  v - the vector

3440:    Notes:
3441:      The vector is owned by PETSc. Users need to call MatDenseRestoreColumnVecWrite() when the vector is no longer needed.

3443:      Use MatDenseGetColumnVec() to obtain read-write access or MatDenseGetColumnVecRead() for read-only access.

3445:    Level: intermediate

3447: .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`
3448: @*/
3449: PetscErrorCode MatDenseGetColumnVecWrite(Mat A, PetscInt col, Vec *v)
3450: {
3457:   PetscUseMethod(A, "MatDenseGetColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v));
3458:   return 0;
3459: }

3461: /*@
3462:    MatDenseRestoreColumnVecWrite - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVecWrite().

3464:    Collective

3466:    Input Parameters:
3467: +  mat - the Mat object
3468: .  col - the column index
3469: -  v - the Vec object (may be NULL)

3471:    Level: intermediate

3473: .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`
3474: @*/
3475: PetscErrorCode MatDenseRestoreColumnVecWrite(Mat A, PetscInt col, Vec *v)
3476: {
3482:   PetscUseMethod(A, "MatDenseRestoreColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v));
3483:   return 0;
3484: }

3486: /*@
3487:    MatDenseGetSubMatrix - Gives access to a block of rows and columns of a dense matrix, represented as a Mat.

3489:    Collective

3491:    Input Parameters:
3492: +  mat - the Mat object
3493: .  rbegin - the first global row index in the block (if PETSC_DECIDE, is 0)
3494: .  rend - the global row index past the last one in the block (if PETSC_DECIDE, is M)
3495: .  cbegin - the first global column index in the block (if PETSC_DECIDE, is 0)
3496: -  cend - the global column index past the last one in the block (if PETSC_DECIDE, is N)

3498:    Output Parameter:
3499: .  v - the matrix

3501:    Notes:
3502:      The matrix is owned by PETSc. Users need to call MatDenseRestoreSubMatrix() when the matrix is no longer needed.

3504:      The output matrix is not redistributed by PETSc, so depending on the values of rbegin and rend, some processes may have no local rows.

3506:    Level: intermediate

3508: .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreSubMatrix()`
3509: @*/
3510: PetscErrorCode MatDenseGetSubMatrix(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
3511: {
3519:   if (rbegin == PETSC_DECIDE) rbegin = 0;
3520:   if (rend == PETSC_DECIDE) rend = A->rmap->N;
3521:   if (cbegin == PETSC_DECIDE) cbegin = 0;
3522:   if (cend == PETSC_DECIDE) cend = A->cmap->N;
3528:   PetscUseMethod(A, "MatDenseGetSubMatrix_C", (Mat, PetscInt, PetscInt, PetscInt, PetscInt, Mat *), (A, rbegin, rend, cbegin, cend, v));
3529:   return 0;
3530: }

3532: /*@
3533:    MatDenseRestoreSubMatrix - Returns access to a block of columns of a dense matrix obtained from MatDenseGetSubMatrix().

3535:    Collective

3537:    Input Parameters:
3538: +  mat - the Mat object
3539: -  v - the Mat object (may be NULL)

3541:    Level: intermediate

3543: .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseGetSubMatrix()`
3544: @*/
3545: PetscErrorCode MatDenseRestoreSubMatrix(Mat A, Mat *v)
3546: {
3550:   PetscUseMethod(A, "MatDenseRestoreSubMatrix_C", (Mat, Mat *), (A, v));
3551:   return 0;
3552: }

3554: #include <petscblaslapack.h>
3555: #include <petsc/private/kernels/blockinvert.h>

3557: PetscErrorCode MatSeqDenseInvert(Mat A)
3558: {
3559:   Mat_SeqDense   *a              = (Mat_SeqDense *)A->data;
3560:   PetscInt        bs             = A->rmap->n;
3561:   MatScalar      *values         = a->v;
3562:   const PetscReal shift          = 0.0;
3563:   PetscBool       allowzeropivot = PetscNot(A->erroriffailure), zeropivotdetected = PETSC_FALSE;

3565:   /* factor and invert each block */
3566:   switch (bs) {
3567:   case 1:
3568:     values[0] = (PetscScalar)1.0 / (values[0] + shift);
3569:     break;
3570:   case 2:
3571:     PetscKernel_A_gets_inverse_A_2(values, shift, allowzeropivot, &zeropivotdetected);
3572:     if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3573:     break;
3574:   case 3:
3575:     PetscKernel_A_gets_inverse_A_3(values, shift, allowzeropivot, &zeropivotdetected);
3576:     if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3577:     break;
3578:   case 4:
3579:     PetscKernel_A_gets_inverse_A_4(values, shift, allowzeropivot, &zeropivotdetected);
3580:     if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3581:     break;
3582:   case 5: {
3583:     PetscScalar work[25];
3584:     PetscInt    ipvt[5];

3586:     PetscKernel_A_gets_inverse_A_5(values, ipvt, work, shift, allowzeropivot, &zeropivotdetected);
3587:     if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3588:   } break;
3589:   case 6:
3590:     PetscKernel_A_gets_inverse_A_6(values, shift, allowzeropivot, &zeropivotdetected);
3591:     if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3592:     break;
3593:   case 7:
3594:     PetscKernel_A_gets_inverse_A_7(values, shift, allowzeropivot, &zeropivotdetected);
3595:     if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3596:     break;
3597:   default: {
3598:     PetscInt    *v_pivots, *IJ, j;
3599:     PetscScalar *v_work;

3601:     PetscMalloc3(bs, &v_work, bs, &v_pivots, bs, &IJ);
3602:     for (j = 0; j < bs; j++) IJ[j] = j;
3603:     PetscKernel_A_gets_inverse_A(bs, values, v_pivots, v_work, allowzeropivot, &zeropivotdetected);
3604:     if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3605:     PetscFree3(v_work, v_pivots, IJ);
3606:   }
3607:   }
3608:   return 0;
3609: }