Actual source code: matimpl.h
1: #pragma once
3: #include <petscmat.h>
4: #include <petscmatcoarsen.h>
5: #include <petsc/private/petscimpl.h>
7: PETSC_EXTERN PetscBool MatRegisterAllCalled;
8: PETSC_EXTERN PetscBool MatSeqAIJRegisterAllCalled;
9: PETSC_EXTERN PetscBool MatOrderingRegisterAllCalled;
10: PETSC_EXTERN PetscBool MatColoringRegisterAllCalled;
11: PETSC_EXTERN PetscBool MatPartitioningRegisterAllCalled;
12: PETSC_EXTERN PetscBool MatCoarsenRegisterAllCalled;
13: PETSC_EXTERN PetscErrorCode MatRegisterAll(void);
14: PETSC_EXTERN PetscErrorCode MatOrderingRegisterAll(void);
15: PETSC_EXTERN PetscErrorCode MatColoringRegisterAll(void);
16: PETSC_EXTERN PetscErrorCode MatPartitioningRegisterAll(void);
17: PETSC_EXTERN PetscErrorCode MatCoarsenRegisterAll(void);
18: PETSC_EXTERN PetscErrorCode MatSeqAIJRegisterAll(void);
20: /* Gets the root type of the input matrix's type (e.g., MATAIJ for MATSEQAIJ) */
21: PETSC_EXTERN PetscErrorCode MatGetRootType_Private(Mat, MatType *);
23: /* Gets the MPI type corresponding to the input matrix's type (e.g., MATMPIAIJ for MATSEQAIJ) */
24: PETSC_INTERN PetscErrorCode MatGetMPIMatType_Private(Mat, MatType *);
26: /*
27: This file defines the parts of the matrix data structure that are
28: shared by all matrix types.
29: */
31: /*
32: If you add entries here also add them to the MATOP enum
33: in include/petscmat.h
34: */
35: typedef struct _MatOps *MatOps;
36: struct _MatOps {
37: /* 0*/
38: PetscErrorCode (*setvalues)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
39: PetscErrorCode (*getrow)(Mat, PetscInt, PetscInt *, PetscInt *[], PetscScalar *[]);
40: PetscErrorCode (*restorerow)(Mat, PetscInt, PetscInt *, PetscInt *[], PetscScalar *[]);
41: PetscErrorCode (*mult)(Mat, Vec, Vec);
42: PetscErrorCode (*multadd)(Mat, Vec, Vec, Vec);
43: /* 5*/
44: PetscErrorCode (*multtranspose)(Mat, Vec, Vec);
45: PetscErrorCode (*multtransposeadd)(Mat, Vec, Vec, Vec);
46: PetscErrorCode (*solve)(Mat, Vec, Vec);
47: PetscErrorCode (*solveadd)(Mat, Vec, Vec, Vec);
48: PetscErrorCode (*solvetranspose)(Mat, Vec, Vec);
49: /*10*/
50: PetscErrorCode (*solvetransposeadd)(Mat, Vec, Vec, Vec);
51: PetscErrorCode (*lufactor)(Mat, IS, IS, const MatFactorInfo *);
52: PetscErrorCode (*choleskyfactor)(Mat, IS, const MatFactorInfo *);
53: PetscErrorCode (*sor)(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec);
54: PetscErrorCode (*transpose)(Mat, MatReuse, Mat *);
55: /*15*/
56: PetscErrorCode (*getinfo)(Mat, MatInfoType, MatInfo *);
57: PetscErrorCode (*equal)(Mat, Mat, PetscBool *);
58: PetscErrorCode (*getdiagonal)(Mat, Vec);
59: PetscErrorCode (*diagonalscale)(Mat, Vec, Vec);
60: PetscErrorCode (*norm)(Mat, NormType, PetscReal *);
61: /*20*/
62: PetscErrorCode (*assemblybegin)(Mat, MatAssemblyType);
63: PetscErrorCode (*assemblyend)(Mat, MatAssemblyType);
64: PetscErrorCode (*setoption)(Mat, MatOption, PetscBool);
65: PetscErrorCode (*zeroentries)(Mat);
66: /*24*/
67: PetscErrorCode (*zerorows)(Mat, PetscInt, const PetscInt[], PetscScalar, Vec, Vec);
68: PetscErrorCode (*lufactorsymbolic)(Mat, Mat, IS, IS, const MatFactorInfo *);
69: PetscErrorCode (*lufactornumeric)(Mat, Mat, const MatFactorInfo *);
70: PetscErrorCode (*choleskyfactorsymbolic)(Mat, Mat, IS, const MatFactorInfo *);
71: PetscErrorCode (*choleskyfactornumeric)(Mat, Mat, const MatFactorInfo *);
72: /*29*/
73: PetscErrorCode (*setup)(Mat);
74: PetscErrorCode (*ilufactorsymbolic)(Mat, Mat, IS, IS, const MatFactorInfo *);
75: PetscErrorCode (*iccfactorsymbolic)(Mat, Mat, IS, const MatFactorInfo *);
76: PetscErrorCode (*getdiagonalblock)(Mat, Mat *);
77: PetscErrorCode (*setinf)(Mat);
78: /*34*/
79: PetscErrorCode (*duplicate)(Mat, MatDuplicateOption, Mat *);
80: PetscErrorCode (*forwardsolve)(Mat, Vec, Vec);
81: PetscErrorCode (*backwardsolve)(Mat, Vec, Vec);
82: PetscErrorCode (*ilufactor)(Mat, IS, IS, const MatFactorInfo *);
83: PetscErrorCode (*iccfactor)(Mat, IS, const MatFactorInfo *);
84: /*39*/
85: PetscErrorCode (*axpy)(Mat, PetscScalar, Mat, MatStructure);
86: PetscErrorCode (*createsubmatrices)(Mat, PetscInt, const IS[], const IS[], MatReuse, Mat *[]);
87: PetscErrorCode (*increaseoverlap)(Mat, PetscInt, IS[], PetscInt);
88: PetscErrorCode (*getvalues)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], PetscScalar[]);
89: PetscErrorCode (*copy)(Mat, Mat, MatStructure);
90: /*44*/
91: PetscErrorCode (*getrowmax)(Mat, Vec, PetscInt[]);
92: PetscErrorCode (*scale)(Mat, PetscScalar);
93: PetscErrorCode (*shift)(Mat, PetscScalar);
94: PetscErrorCode (*diagonalset)(Mat, Vec, InsertMode);
95: PetscErrorCode (*zerorowscolumns)(Mat, PetscInt, const PetscInt[], PetscScalar, Vec, Vec);
96: /*49*/
97: PetscErrorCode (*setrandom)(Mat, PetscRandom);
98: PetscErrorCode (*getrowij)(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscBool *);
99: PetscErrorCode (*restorerowij)(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscBool *);
100: PetscErrorCode (*getcolumnij)(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscBool *);
101: PetscErrorCode (*restorecolumnij)(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscBool *);
102: /*54*/
103: PetscErrorCode (*fdcoloringcreate)(Mat, ISColoring, MatFDColoring);
104: PetscErrorCode (*coloringpatch)(Mat, PetscInt, PetscInt, ISColoringValue[], ISColoring *);
105: PetscErrorCode (*setunfactored)(Mat);
106: PetscErrorCode (*permute)(Mat, IS, IS, Mat *);
107: PetscErrorCode (*setvaluesblocked)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
108: /*59*/
109: PetscErrorCode (*createsubmatrix)(Mat, IS, IS, MatReuse, Mat *);
110: PetscErrorCode (*destroy)(Mat);
111: PetscErrorCode (*view)(Mat, PetscViewer);
112: PetscErrorCode (*convertfrom)(Mat, MatType, MatReuse, Mat *);
113: PetscErrorCode (*matmatmultsymbolic)(Mat, Mat, Mat, PetscReal, Mat);
114: /*64*/
115: PetscErrorCode (*matmatmultnumeric)(Mat, Mat, Mat, Mat);
116: PetscErrorCode (*setlocaltoglobalmapping)(Mat, ISLocalToGlobalMapping, ISLocalToGlobalMapping);
117: PetscErrorCode (*setvalueslocal)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
118: PetscErrorCode (*zerorowslocal)(Mat, PetscInt, const PetscInt[], PetscScalar, Vec, Vec);
119: PetscErrorCode (*getrowmaxabs)(Mat, Vec, PetscInt[]);
120: /*69*/
121: PetscErrorCode (*getrowminabs)(Mat, Vec, PetscInt[]);
122: PetscErrorCode (*convert)(Mat, MatType, MatReuse, Mat *);
123: PetscErrorCode (*hasoperation)(Mat, MatOperation, PetscBool *);
124: PetscErrorCode (*fdcoloringapply)(Mat, MatFDColoring, Vec, void *);
125: PetscErrorCode (*setfromoptions)(Mat, PetscOptionItems);
126: /*74*/
127: PetscErrorCode (*findzerodiagonals)(Mat, IS *);
128: PetscErrorCode (*mults)(Mat, Vecs, Vecs);
129: PetscErrorCode (*solves)(Mat, Vecs, Vecs);
130: PetscErrorCode (*getinertia)(Mat, PetscInt *, PetscInt *, PetscInt *);
131: PetscErrorCode (*load)(Mat, PetscViewer);
132: /*79*/
133: PetscErrorCode (*issymmetric)(Mat, PetscReal, PetscBool *);
134: PetscErrorCode (*ishermitian)(Mat, PetscReal, PetscBool *);
135: PetscErrorCode (*isstructurallysymmetric)(Mat, PetscBool *);
136: PetscErrorCode (*setvaluesblockedlocal)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
137: PetscErrorCode (*getvecs)(Mat, Vec *, Vec *);
138: /*84*/
139: PetscErrorCode (*matmultsymbolic)(Mat, Mat, PetscReal, Mat);
140: PetscErrorCode (*matmultnumeric)(Mat, Mat, Mat);
141: PetscErrorCode (*ptapnumeric)(Mat, Mat, Mat); /* double dispatch wrapper routine */
142: PetscErrorCode (*mattransposemultsymbolic)(Mat, Mat, PetscReal, Mat);
143: PetscErrorCode (*mattransposemultnumeric)(Mat, Mat, Mat);
144: /*89*/
145: PetscErrorCode (*bindtocpu)(Mat, PetscBool);
146: PetscErrorCode (*productsetfromoptions)(Mat);
147: PetscErrorCode (*productsymbolic)(Mat);
148: PetscErrorCode (*productnumeric)(Mat);
149: PetscErrorCode (*conjugate)(Mat); /* complex conjugate */
150: /*94*/
151: PetscErrorCode (*viewnative)(Mat, PetscViewer);
152: PetscErrorCode (*setvaluesrow)(Mat, PetscInt, const PetscScalar[]);
153: PetscErrorCode (*realpart)(Mat);
154: PetscErrorCode (*imaginarypart)(Mat);
155: PetscErrorCode (*getrowuppertriangular)(Mat);
156: /*99*/
157: PetscErrorCode (*restorerowuppertriangular)(Mat);
158: PetscErrorCode (*matsolve)(Mat, Mat, Mat);
159: PetscErrorCode (*matsolvetranspose)(Mat, Mat, Mat);
160: PetscErrorCode (*getrowmin)(Mat, Vec, PetscInt[]);
161: PetscErrorCode (*getcolumnvector)(Mat, Vec, PetscInt);
162: /*104*/
163: PetscErrorCode (*getseqnonzerostructure)(Mat, Mat *);
164: PetscErrorCode (*create)(Mat);
165: PetscErrorCode (*getghosts)(Mat, PetscInt *, const PetscInt *[]);
166: PetscErrorCode (*getlocalsubmatrix)(Mat, IS, IS, Mat *);
167: PetscErrorCode (*restorelocalsubmatrix)(Mat, IS, IS, Mat *);
168: /*109*/
169: PetscErrorCode (*multdiagonalblock)(Mat, Vec, Vec);
170: PetscErrorCode (*hermitiantranspose)(Mat, MatReuse, Mat *);
171: PetscErrorCode (*multhermitiantranspose)(Mat, Vec, Vec);
172: PetscErrorCode (*multhermitiantransposeadd)(Mat, Vec, Vec, Vec);
173: PetscErrorCode (*getmultiprocblock)(Mat, MPI_Comm, MatReuse, Mat *);
174: /*114*/
175: PetscErrorCode (*findnonzerorows)(Mat, IS *);
176: PetscErrorCode (*getcolumnreductions)(Mat, PetscInt, PetscReal *);
177: PetscErrorCode (*invertblockdiagonal)(Mat, const PetscScalar **);
178: PetscErrorCode (*invertvariableblockdiagonal)(Mat, PetscInt, const PetscInt *, PetscScalar *);
179: PetscErrorCode (*createsubmatricesmpi)(Mat, PetscInt, const IS[], const IS[], MatReuse, Mat **);
180: /*119*/
181: PetscErrorCode (*setvaluesbatch)(Mat, PetscInt, PetscInt, PetscInt *, const PetscScalar *);
182: PetscErrorCode (*transposematmultsymbolic)(Mat, Mat, PetscReal, Mat);
183: PetscErrorCode (*transposematmultnumeric)(Mat, Mat, Mat);
184: PetscErrorCode (*transposecoloringcreate)(Mat, ISColoring, MatTransposeColoring);
185: PetscErrorCode (*transcoloringapplysptoden)(MatTransposeColoring, Mat, Mat);
186: /*124*/
187: PetscErrorCode (*transcoloringapplydentosp)(MatTransposeColoring, Mat, Mat);
188: PetscErrorCode (*rartnumeric)(Mat, Mat, Mat); /* double dispatch wrapper routine */
189: PetscErrorCode (*setblocksizes)(Mat, PetscInt, PetscInt);
190: PetscErrorCode (*residual)(Mat, Vec, Vec, Vec);
191: PetscErrorCode (*fdcoloringsetup)(Mat, ISColoring, MatFDColoring);
192: /*129*/
193: PetscErrorCode (*findoffblockdiagonalentries)(Mat, IS *);
194: PetscErrorCode (*creatempimatconcatenateseqmat)(MPI_Comm, Mat, PetscInt, MatReuse, Mat *);
195: PetscErrorCode (*destroysubmatrices)(PetscInt, Mat *[]);
196: PetscErrorCode (*mattransposesolve)(Mat, Mat, Mat);
197: PetscErrorCode (*getvalueslocal)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], PetscScalar[]);
198: /*134*/
199: PetscErrorCode (*creategraph)(Mat, PetscBool, PetscBool, PetscReal, PetscInt, PetscInt[], Mat *);
200: PetscErrorCode (*transposesymbolic)(Mat, Mat *);
201: PetscErrorCode (*eliminatezeros)(Mat, PetscBool);
202: PetscErrorCode (*getrowsumabs)(Mat, Vec);
203: PetscErrorCode (*getfactor)(Mat, MatSolverType, MatFactorType, Mat *);
204: /*139*/
205: PetscErrorCode (*getblockdiagonal)(Mat, Mat *); // NOTE: the caller of get{block, vblock}diagonal owns the returned matrix;
206: PetscErrorCode (*getvblockdiagonal)(Mat, Mat *); // they must destroy it after use
207: PetscErrorCode (*copyhashtoxaij)(Mat, Mat);
208: PetscErrorCode (*getcurrentmemtype)(Mat, PetscMemType *);
209: PetscErrorCode (*zerorowscolumnslocal)(Mat, PetscInt, const PetscInt[], PetscScalar, Vec, Vec);
210: /*144*/
211: PetscErrorCode (*adot)(Mat, Vec, Vec, PetscScalar *); /* induced vector inner product */
212: PetscErrorCode (*anorm)(Mat, Vec, PetscReal *); /* induced vector norm */
213: PetscErrorCode (*adot_local)(Mat, Vec, Vec, PetscScalar *);
214: PetscErrorCode (*anorm_local)(Mat, Vec, PetscReal *);
215: };
216: /*
217: If you add MatOps entries above also add them to the MATOP enum
218: in include/petscmat.h
219: */
221: #include <petscsys.h>
223: typedef struct _p_MatRootName *MatRootName;
224: struct _p_MatRootName {
225: char *rname, *sname, *mname;
226: MatRootName next;
227: };
229: PETSC_EXTERN MatRootName MatRootNameList;
231: /*
232: Utility private matrix routines used outside Mat
233: */
234: PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode MatFindNonzeroRowsOrCols_Basic(Mat, PetscBool, PetscReal, IS *);
235: PETSC_EXTERN PetscErrorCode MatShellGetScalingShifts(Mat, PetscScalar *, PetscScalar *, Vec *, Vec *, Vec *, Mat *, IS *, IS *);
237: #define MAT_SHELL_NOT_ALLOWED (void *)-1
239: /*
240: Utility private matrix routines
241: */
242: PETSC_INTERN PetscErrorCode MatConvert_Basic(Mat, MatType, MatReuse, Mat *);
243: PETSC_INTERN PetscErrorCode MatConvert_Shell(Mat, MatType, MatReuse, Mat *);
244: PETSC_INTERN PetscErrorCode MatConvertFrom_Shell(Mat, MatType, MatReuse, Mat *);
245: PETSC_INTERN PetscErrorCode MatShellSetContext_Immutable(Mat, void *);
246: PETSC_INTERN PetscErrorCode MatShellSetContextDestroy_Immutable(Mat, PetscCtxDestroyFn *);
247: PETSC_INTERN PetscErrorCode MatShellSetManageScalingShifts_Immutable(Mat);
248: PETSC_INTERN PetscErrorCode MatCopy_Basic(Mat, Mat, MatStructure);
249: PETSC_INTERN PetscErrorCode MatDiagonalSet_Default(Mat, Vec, InsertMode);
250: #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
251: PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
252: #endif
253: PETSC_INTERN PetscErrorCode MatSetPreallocationCOO_Basic(Mat, PetscCount, PetscInt[], PetscInt[]);
254: PETSC_INTERN PetscErrorCode MatSetValuesCOO_Basic(Mat, const PetscScalar[], InsertMode);
256: /* Scattering of dense matrices with strided PetscSF */
257: PETSC_EXTERN PetscErrorCode MatDenseScatter_Private(PetscSF, Mat, Mat, InsertMode, ScatterMode);
259: /* This can be moved to the public header after implementing some missing MatProducts */
260: PETSC_INTERN PetscErrorCode MatCreateFromISLocalToGlobalMapping(ISLocalToGlobalMapping, Mat, PetscBool, PetscBool, MatType, Mat *);
262: /* these callbacks rely on the old matrix function pointers for
263: matmat operations. They are unsafe, and should be removed.
264: However, the amount of work needed to clean up all the
265: implementations is not negligible */
266: PETSC_INTERN PetscErrorCode MatProductSymbolic_AB(Mat);
267: PETSC_INTERN PetscErrorCode MatProductNumeric_AB(Mat);
268: PETSC_INTERN PetscErrorCode MatProductSymbolic_AtB(Mat);
269: PETSC_INTERN PetscErrorCode MatProductNumeric_AtB(Mat);
270: PETSC_INTERN PetscErrorCode MatProductSymbolic_ABt(Mat);
271: PETSC_INTERN PetscErrorCode MatProductNumeric_ABt(Mat);
272: PETSC_INTERN PetscErrorCode MatProductNumeric_PtAP(Mat);
273: PETSC_INTERN PetscErrorCode MatProductNumeric_RARt(Mat);
274: PETSC_INTERN PetscErrorCode MatProductSymbolic_ABC(Mat);
275: PETSC_INTERN PetscErrorCode MatProductNumeric_ABC(Mat);
277: PETSC_INTERN PetscErrorCode MatProductCreate_Private(Mat, Mat, Mat, Mat);
278: /* this callback handles all the different triple products and
279: does not rely on the function pointers; used by cuSPARSE/hipSPARSE and KOKKOS-KERNELS */
280: PETSC_INTERN PetscErrorCode MatProductSymbolic_ABC_Basic(Mat);
282: /* CreateGraph is common to AIJ seq and mpi */
283: PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat, PetscBool, PetscBool, PetscReal, PetscInt, PetscInt[], Mat *);
285: #if defined(PETSC_CLANG_STATIC_ANALYZER)
286: template <typename Tm>
287: extern void MatCheckPreallocated(Tm, int);
288: template <typename Tm>
289: extern void MatCheckProduct(Tm, int);
290: #else /* PETSC_CLANG_STATIC_ANALYZER */
291: #define MatCheckPreallocated(A, arg) \
292: do { \
293: if (!(A)->preallocated) PetscCall(MatSetUp(A)); \
294: } while (0)
296: #if defined(PETSC_USE_DEBUG)
297: #define MatCheckProduct(A, arg) \
298: do { \
299: PetscCheck((A)->product, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Argument %d \"%s\" is not a matrix obtained from MatProductCreate()", (arg), #A); \
300: } while (0)
301: #else
302: #define MatCheckProduct(A, arg) \
303: do { \
304: } while (0)
305: #endif
306: #endif /* PETSC_CLANG_STATIC_ANALYZER */
308: /*
309: The stash is used to temporarily store inserted matrix values that
310: belong to another processor. During the assembly phase the stashed
311: values are moved to the correct processor and
312: */
314: typedef struct _MatStashSpace *PetscMatStashSpace;
316: struct _MatStashSpace {
317: PetscMatStashSpace next;
318: PetscScalar *space_head, *val;
319: PetscInt *idx, *idy;
320: PetscInt total_space_size;
321: PetscInt local_used;
322: PetscInt local_remaining;
323: };
325: PETSC_EXTERN PetscErrorCode PetscMatStashSpaceGet(PetscInt, PetscInt, PetscMatStashSpace *);
326: PETSC_EXTERN PetscErrorCode PetscMatStashSpaceContiguous(PetscInt, PetscMatStashSpace *, PetscScalar *, PetscInt *, PetscInt *);
327: PETSC_EXTERN PetscErrorCode PetscMatStashSpaceDestroy(PetscMatStashSpace *);
329: typedef struct {
330: PetscInt count;
331: } MatStashHeader;
333: typedef struct {
334: void *buffer; /* Of type blocktype, dynamically constructed */
335: PetscInt count;
336: char pending;
337: } MatStashFrame;
339: typedef struct _MatStash MatStash;
340: struct _MatStash {
341: PetscInt nmax; /* maximum stash size */
342: PetscInt umax; /* user specified max-size */
343: PetscInt oldnmax; /* the nmax value used previously */
344: PetscInt n; /* stash size */
345: PetscInt bs; /* block size of the stash */
346: PetscInt reallocs; /* preserve the no of mallocs invoked */
347: PetscMatStashSpace space_head, space; /* linked list to hold stashed global row/column numbers and matrix values */
349: PetscErrorCode (*ScatterBegin)(Mat, MatStash *, PetscInt *);
350: PetscErrorCode (*ScatterGetMesg)(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
351: PetscErrorCode (*ScatterEnd)(MatStash *);
352: PetscErrorCode (*ScatterDestroy)(MatStash *);
354: /* The following variables are used for communication */
355: MPI_Comm comm;
356: PetscMPIInt size, rank;
357: PetscMPIInt tag1, tag2;
358: MPI_Request *send_waits; /* array of send requests */
359: MPI_Request *recv_waits; /* array of receive requests */
360: MPI_Status *send_status; /* array of send status */
361: PetscMPIInt nsends, nrecvs; /* numbers of sends and receives */
362: PetscScalar *svalues; /* sending data */
363: PetscInt *sindices;
364: PetscScalar **rvalues; /* receiving data (values) */
365: PetscInt **rindices; /* receiving data (indices) */
366: PetscMPIInt nprocessed; /* number of messages already processed */
367: PetscMPIInt *flg_v; /* indicates what messages have arrived so far and from whom */
368: PetscBool reproduce;
369: PetscMPIInt reproduce_count;
371: /* The following variables are used for BTS communication */
372: PetscBool first_assembly_done; /* Is the first time matrix assembly done? */
373: PetscBool use_status; /* Use MPI_Status to determine number of items in each message */
374: PetscMPIInt nsendranks;
375: PetscMPIInt nrecvranks;
376: PetscMPIInt *sendranks;
377: PetscMPIInt *recvranks;
378: MatStashHeader *sendhdr, *recvhdr;
379: MatStashFrame *sendframes; /* pointers to the main messages */
380: MatStashFrame *recvframes;
381: MatStashFrame *recvframe_active;
382: PetscInt recvframe_i; /* index of block within active frame */
383: PetscInt recvframe_count; /* Count actually sent for current frame */
384: PetscMPIInt recvcount; /* Number of receives processed so far */
385: PetscMPIInt *some_indices; /* From last call to MPI_Waitsome */
386: MPI_Status *some_statuses; /* Statuses from last call to MPI_Waitsome */
387: PetscMPIInt some_count; /* Number of requests completed in last call to MPI_Waitsome */
388: PetscMPIInt some_i; /* Index of request currently being processed */
389: MPI_Request *sendreqs;
390: MPI_Request *recvreqs;
391: PetscSegBuffer segsendblocks;
392: PetscSegBuffer segrecvframe;
393: PetscSegBuffer segrecvblocks;
394: MPI_Datatype blocktype;
395: size_t blocktype_size;
396: InsertMode *insertmode; /* Pointer to check mat->insertmode and set upon message arrival in case no local values have been set. */
397: };
399: #if !defined(PETSC_HAVE_MPIUNI)
400: PETSC_INTERN PetscErrorCode MatStashScatterDestroy_BTS(MatStash *);
401: #endif
402: PETSC_INTERN PetscErrorCode MatStashCreate_Private(MPI_Comm, PetscInt, MatStash *);
403: PETSC_INTERN PetscErrorCode MatStashDestroy_Private(MatStash *);
404: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Private(MatStash *);
405: PETSC_INTERN PetscErrorCode MatStashSetInitialSize_Private(MatStash *, PetscInt);
406: PETSC_INTERN PetscErrorCode MatStashGetInfo_Private(MatStash *, PetscInt *, PetscInt *);
407: PETSC_INTERN PetscErrorCode MatStashValuesRow_Private(MatStash *, PetscInt, PetscInt, const PetscInt[], const PetscScalar[], PetscBool);
408: PETSC_INTERN PetscErrorCode MatStashValuesCol_Private(MatStash *, PetscInt, PetscInt, const PetscInt[], const PetscScalar[], PetscInt, PetscBool);
409: PETSC_INTERN PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *, PetscInt, PetscInt, const PetscInt[], const PetscScalar[], PetscInt, PetscInt, PetscInt);
410: PETSC_INTERN PetscErrorCode MatStashValuesColBlocked_Private(MatStash *, PetscInt, PetscInt, const PetscInt[], const PetscScalar[], PetscInt, PetscInt, PetscInt);
411: PETSC_INTERN PetscErrorCode MatStashScatterBegin_Private(Mat, MatStash *, PetscInt *);
412: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Private(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
413: PETSC_INTERN PetscErrorCode MatGetInfo_External(Mat, MatInfoType, MatInfo *);
415: typedef struct {
416: PetscInt dim;
417: PetscInt dims[4];
418: PetscInt starts[4];
419: PetscBool noc; /* this is a single component problem, hence user will not set MatStencil.c */
420: } MatStencilInfo;
422: /* Info about using compressed row format */
423: typedef struct {
424: PetscBool use; /* indicates compressed rows have been checked and will be used */
425: PetscInt nrows; /* number of non-zero rows */
426: PetscInt *i; /* compressed row pointer */
427: PetscInt *rindex; /* compressed row index */
428: } Mat_CompressedRow;
429: PETSC_EXTERN PetscErrorCode MatCheckCompressedRow(Mat, PetscInt, Mat_CompressedRow *, PetscInt *, PetscInt, PetscReal);
431: typedef struct { /* used by MatCreateRedundantMatrix() for reusing matredundant */
432: PetscInt nzlocal, nsends, nrecvs;
433: PetscMPIInt *send_rank, *recv_rank;
434: PetscInt *sbuf_nz, *rbuf_nz, *sbuf_j, **rbuf_j;
435: PetscScalar *sbuf_a, **rbuf_a;
436: MPI_Comm subcomm; /* when user does not provide a subcomm */
437: IS isrow, iscol;
438: Mat *matseq;
439: } Mat_Redundant;
441: typedef struct { /* used by MatProduct() */
442: MatProductType type;
443: char *alg;
444: Mat A, B, C, Dwork;
445: PetscBool symbolic_used_the_fact_A_is_symmetric; /* Symbolic phase took advantage of the fact that A is symmetric, and optimized e.g. AtB as AB. Then, .. */
446: PetscBool symbolic_used_the_fact_B_is_symmetric; /* .. in the numeric phase, if a new A is not symmetric (but has the same sparsity as the old A therefore .. */
447: PetscBool symbolic_used_the_fact_C_is_symmetric; /* MatMatMult(A,B,MAT_REUSE_MATRIX,..&C) is still legitimate), we need to redo symbolic! */
448: PetscObjectParameterDeclare(PetscReal, fill);
449: PetscBool api_user; /* used to distinguish command line options and to indicate the matrix values are ready to be consumed at symbolic phase if needed */
450: PetscBool setfromoptionscalled;
452: /* Some products may display the information on the algorithm used */
453: PetscErrorCode (*view)(Mat, PetscViewer);
455: /* many products have intermediate data structures, each specific to Mat types and product type */
456: PetscBool clear; /* whether or not to clear the data structures after MatProductNumeric has been called */
457: void *data; /* where to stash those structures */
458: PetscCtxDestroyFn *destroy; /* freeing data */
459: } Mat_Product;
461: struct _p_Mat {
462: PETSCHEADER(struct _MatOps);
463: PetscLayout rmap, cmap;
464: void *data; /* implementation-specific data */
465: MatFactorType factortype; /* MAT_FACTOR_LU, ILU, CHOLESKY or ICC */
466: PetscBool trivialsymbolic; /* indicates the symbolic factorization doesn't actually do a symbolic factorization, it is delayed to the numeric factorization */
467: PetscBool canuseordering; /* factorization can use ordering provide to routine (most PETSc implementations) */
468: MatOrderingType preferredordering[MAT_FACTOR_NUM_TYPES]; /* what is the preferred (or default) ordering for the matrix solver type */
469: PetscBool assembled; /* is the matrix assembled? */
470: PetscBool was_assembled; /* new values inserted into assembled mat */
471: PetscInt num_ass; /* number of times matrix has been assembled */
472: PetscObjectState nonzerostate; /* each time new nonzeros locations are introduced into the matrix this is updated */
473: PetscObjectState ass_nonzerostate; /* nonzero state at last assembly */
474: MatInfo info; /* matrix information */
475: InsertMode insertmode; /* have values been inserted in matrix or added? */
476: MatStash stash, bstash; /* used for assembling off-proc mat emements */
477: MatNullSpace nullsp; /* null space (operator is singular) */
478: MatNullSpace transnullsp; /* null space of transpose of operator */
479: MatNullSpace nearnullsp; /* near null space to be used by multigrid methods */
480: PetscInt congruentlayouts; /* are the rows and columns layouts congruent? */
481: PetscBool preallocated;
482: MatStencilInfo stencil; /* information for structured grid */
483: PetscBool3 symmetric, hermitian, structurally_symmetric, spd;
484: PetscBool symmetry_eternal, structural_symmetry_eternal, spd_eternal;
485: PetscBool nooffprocentries, nooffproczerorows;
486: PetscBool assembly_subset; /* set by MAT_SUBSET_OFF_PROC_ENTRIES */
487: PetscBool submat_singleis; /* for efficient PCSetUp_ASM() */
488: PetscBool structure_only;
489: PetscBool sortedfull; /* full, sorted rows are inserted */
490: PetscBool force_diagonals; /* set by MAT_FORCE_DIAGONAL_ENTRIES */
491: #if defined(PETSC_HAVE_DEVICE)
492: PetscOffloadMask offloadmask; /* a mask which indicates where the valid matrix data is (GPU, CPU or both) */
493: PetscBool boundtocpu;
494: PetscBool bindingpropagates;
495: #endif
496: char *defaultrandtype;
497: void *spptr; /* pointer for special library like SuperLU */
498: char *solvertype;
499: PetscBool checksymmetryonassembly, checknullspaceonassembly;
500: PetscReal checksymmetrytol;
501: Mat schur; /* Schur complement matrix */
502: MatFactorSchurStatus schur_status; /* status of the Schur complement matrix */
503: Mat_Redundant *redundant; /* used by MatCreateRedundantMatrix() */
504: PetscBool erroriffailure; /* Generate an error if detected (for example a zero pivot) instead of returning */
505: MatFactorError factorerrortype; /* type of error in factorization */
506: PetscReal factorerror_zeropivot_value; /* If numerical zero pivot was detected this is the computed value */
507: PetscInt factorerror_zeropivot_row; /* Row where zero pivot was detected */
508: PetscInt nblocks, *bsizes; /* support for MatSetVariableBlockSizes() */
509: PetscInt p_cstart, p_rank, p_cend, n_rank; /* Information from parallel MatComputeVariableBlockEnvelope() */
510: PetscBool p_parallel;
511: char *defaultvectype;
512: Mat_Product *product;
513: PetscBool form_explicit_transpose; /* hint to generate an explicit mat tranpsose for operations like MatMultTranspose() */
514: PetscBool transupdated; /* whether or not the explicitly generated transpose is up-to-date */
515: char *factorprefix; /* the prefix to use with factored matrix that is created */
516: PetscBool hash_active; /* indicates MatSetValues() is being handled by hashing */
517: Vec dot_vec; /* work vector used by MatADot_Default() */
518: };
520: PETSC_INTERN PetscErrorCode MatAXPY_Basic(Mat, PetscScalar, Mat, MatStructure);
521: PETSC_INTERN PetscErrorCode MatAXPY_BasicWithPreallocation(Mat, Mat, PetscScalar, Mat, MatStructure);
522: PETSC_INTERN PetscErrorCode MatAXPY_Basic_Preallocate(Mat, Mat, Mat *);
523: PETSC_INTERN PetscErrorCode MatAXPY_Dense_Nest(Mat, PetscScalar, Mat);
525: PETSC_INTERN PetscErrorCode MatSetUp_Default(Mat);
527: /*
528: Utility for MatZeroRows
529: */
530: PETSC_INTERN PetscErrorCode MatZeroRowsMapLocal_Private(Mat, PetscInt, const PetscInt *, PetscInt *, PetscInt **);
532: /*
533: Utility for MatView/MatLoad
534: */
535: PETSC_INTERN PetscErrorCode MatView_Binary_BlockSizes(Mat, PetscViewer);
536: PETSC_INTERN PetscErrorCode MatLoad_Binary_BlockSizes(Mat, PetscViewer);
538: /*
539: Object for partitioning graphs
540: */
542: typedef struct _MatPartitioningOps *MatPartitioningOps;
543: struct _MatPartitioningOps {
544: PetscErrorCode (*apply)(MatPartitioning, IS *);
545: PetscErrorCode (*applynd)(MatPartitioning, IS *);
546: PetscErrorCode (*setfromoptions)(MatPartitioning, PetscOptionItems);
547: PetscErrorCode (*destroy)(MatPartitioning);
548: PetscErrorCode (*view)(MatPartitioning, PetscViewer);
549: PetscErrorCode (*improve)(MatPartitioning, IS *);
550: };
552: struct _p_MatPartitioning {
553: PETSCHEADER(struct _MatPartitioningOps);
554: Mat adj;
555: PetscInt *vertex_weights;
556: PetscReal *part_weights;
557: PetscInt n; /* number of partitions */
558: PetscInt ncon; /* number of vertex weights per vertex */
559: void *data;
560: PetscBool use_edge_weights; /* A flag indicates whether or not to use edge weights */
561: };
563: /* needed for parallel nested dissection by ParMetis and PTSCOTCH */
564: PETSC_INTERN PetscErrorCode MatPartitioningSizesToSep_Private(PetscInt, PetscInt[], PetscInt[], PetscInt[]);
566: /*
567: Object for coarsen graphs
568: */
569: typedef struct _MatCoarsenOps *MatCoarsenOps;
570: struct _MatCoarsenOps {
571: PetscErrorCode (*apply)(MatCoarsen);
572: PetscErrorCode (*setfromoptions)(MatCoarsen, PetscOptionItems);
573: PetscErrorCode (*destroy)(MatCoarsen);
574: PetscErrorCode (*view)(MatCoarsen, PetscViewer);
575: };
577: #define MAT_COARSEN_STRENGTH_INDEX_SIZE 3
578: struct _p_MatCoarsen {
579: PETSCHEADER(struct _MatCoarsenOps);
580: Mat graph;
581: void *subctx;
582: /* */
583: PetscBool strict_aggs;
584: IS perm;
585: PetscCoarsenData *agg_lists;
586: PetscInt max_it; /* number of iterations in HEM */
587: PetscReal threshold; /* HEM can filter interim graphs */
588: PetscInt strength_index_size;
589: PetscInt strength_index[MAT_COARSEN_STRENGTH_INDEX_SIZE];
590: };
592: PETSC_EXTERN PetscErrorCode MatCoarsenMISKSetDistance(MatCoarsen, PetscInt);
593: PETSC_EXTERN PetscErrorCode MatCoarsenMISKGetDistance(MatCoarsen, PetscInt *);
595: /*
596: Used in aijdevice.h
597: */
598: typedef struct {
599: PetscInt *i;
600: PetscInt *j;
601: PetscScalar *a;
602: PetscInt n;
603: PetscInt ignorezeroentries;
604: } PetscCSRDataStructure;
606: /*
607: MatFDColoring is used to compute Jacobian matrices efficiently
608: via coloring. The data structure is explained below in an example.
610: Color = 0 1 0 2 | 2 3 0
611: ---------------------------------------------------
612: 00 01 | 05
613: 10 11 | 14 15 Processor 0
614: 22 23 | 25
615: 32 33 |
616: ===================================================
617: | 44 45 46
618: 50 | 55 Processor 1
619: | 64 66
620: ---------------------------------------------------
622: ncolors = 4;
624: ncolumns = {2,1,1,0}
625: columns = {{0,2},{1},{3},{}}
626: nrows = {4,2,3,3}
627: rows = {{0,1,2,3},{0,1},{1,2,3},{0,1,2}}
628: vwscale = {dx(0),dx(1),dx(2),dx(3)} MPI Vec
629: vscale = {dx(0),dx(1),dx(2),dx(3),dx(4),dx(5)} Seq Vec
631: ncolumns = {1,0,1,1}
632: columns = {{6},{},{4},{5}}
633: nrows = {3,0,2,2}
634: rows = {{0,1,2},{},{1,2},{1,2}}
635: vwscale = {dx(4),dx(5),dx(6)} MPI Vec
636: vscale = {dx(0),dx(4),dx(5),dx(6)} Seq Vec
638: See the routine MatFDColoringApply() for how this data is used
639: to compute the Jacobian.
641: */
642: typedef struct {
643: PetscInt row;
644: PetscInt col;
645: PetscScalar *valaddr; /* address of value */
646: } MatEntry;
648: typedef struct {
649: PetscInt row;
650: PetscScalar *valaddr; /* address of value */
651: } MatEntry2;
653: struct _p_MatFDColoring {
654: PETSCHEADER(int);
655: PetscInt M, N, m; /* total rows, columns; local rows */
656: PetscInt rstart; /* first row owned by local processor */
657: PetscInt ncolors; /* number of colors */
658: PetscInt *ncolumns; /* number of local columns for a color */
659: PetscInt **columns; /* lists the local columns of each color (using global column numbering) */
660: IS *isa; /* these are the IS that contain the column values given in columns */
661: PetscInt *nrows; /* number of local rows for each color */
662: MatEntry *matentry; /* holds (row, column, address of value) for Jacobian matrix entry */
663: MatEntry2 *matentry2; /* holds (row, address of value) for Jacobian matrix entry */
664: PetscScalar *dy; /* store a block of F(x+dx)-F(x) when J is in BAIJ format */
665: PetscReal error_rel; /* square root of relative error in computing function */
666: PetscReal umin; /* minimum allowable u'dx value */
667: Vec w1, w2, w3; /* work vectors used in computing Jacobian */
668: PetscBool fset; /* indicates that the initial function value F(X) is set */
669: MatFDColoringFn *f; /* function that defines Jacobian */
670: void *fctx; /* optional user-defined context for use by the function f */
671: Vec vscale; /* holds FD scaling, i.e. 1/dx for each perturbed column */
672: PetscInt currentcolor; /* color for which function evaluation is being done now */
673: const char *htype; /* "wp" or "ds" */
674: ISColoringType ctype; /* IS_COLORING_GLOBAL or IS_COLORING_LOCAL */
675: PetscInt brows, bcols; /* number of block rows or columns for speedup inserting the dense matrix into sparse Jacobian */
676: PetscBool setupcalled; /* true if setup has been called */
677: PetscBool viewed; /* true if the -mat_fd_coloring_view has been triggered already */
678: PetscFortranCallbackFn *ftn_func_pointer; /* serve the same purpose as *fortran_func_pointers in PETSc objects */
679: void *ftn_func_cntx;
680: PetscObjectId matid; /* matrix this object was created with, must always be the same */
681: };
683: typedef struct _MatColoringOps *MatColoringOps;
684: struct _MatColoringOps {
685: PetscErrorCode (*destroy)(MatColoring);
686: PetscErrorCode (*setfromoptions)(MatColoring, PetscOptionItems);
687: PetscErrorCode (*view)(MatColoring, PetscViewer);
688: PetscErrorCode (*apply)(MatColoring, ISColoring *);
689: PetscErrorCode (*weights)(MatColoring, PetscReal **, PetscInt **);
690: };
692: struct _p_MatColoring {
693: PETSCHEADER(struct _MatColoringOps);
694: Mat mat;
695: PetscInt dist; /* distance of the coloring */
696: PetscInt maxcolors; /* the maximum number of colors returned, maxcolors=1 for MIS */
697: void *data; /* inner context */
698: PetscBool valid; /* check to see if what is produced is a valid coloring */
699: MatColoringWeightType weight_type; /* type of weight computation to be performed */
700: PetscReal *user_weights; /* custom weights and permutation */
701: PetscInt *user_lperm;
702: PetscBool valid_iscoloring; /* check to see if matcoloring is produced a valid iscoloring */
703: };
705: struct _p_MatTransposeColoring {
706: PETSCHEADER(int);
707: PetscInt M, N, m; /* total rows, columns; local rows */
708: PetscInt rstart; /* first row owned by local processor */
709: PetscInt ncolors; /* number of colors */
710: PetscInt *ncolumns; /* number of local columns for a color */
711: PetscInt *nrows; /* number of local rows for each color */
712: PetscInt currentcolor; /* color for which function evaluation is being done now */
713: ISColoringType ctype; /* IS_COLORING_GLOBAL or IS_COLORING_LOCAL */
715: PetscInt *colorforrow, *colorforcol; /* pointer to rows and columns */
716: PetscInt *rows; /* lists the local rows for each color (using the local row numbering) */
717: PetscInt *den2sp; /* maps (row,color) in the dense matrix to index of sparse matrix array a->a */
718: PetscInt *columns; /* lists the local columns of each color (using global column numbering) */
719: PetscInt brows; /* number of rows for efficient implementation of MatTransColoringApplyDenToSp() */
720: PetscInt *lstart; /* array used for loop over row blocks of Csparse */
721: };
723: /*
724: Null space context for preconditioner/operators
725: */
726: struct _p_MatNullSpace {
727: PETSCHEADER(int);
728: PetscBool has_cnst;
729: PetscInt n;
730: Vec *vecs;
731: PetscScalar *alpha; /* for projections */
732: MatNullSpaceRemoveFn *remove; /* for user provided removal function */
733: void *rmctx; /* context for remove() function */
734: };
736: /*
737: Internal data structure for MATMPIDENSE
738: */
739: typedef struct {
740: Mat A; /* local submatrix */
742: /* The following variables are used for matrix assembly */
743: PetscBool donotstash; /* Flag indicating if values should be stashed */
744: MPI_Request *send_waits; /* array of send requests */
745: MPI_Request *recv_waits; /* array of receive requests */
746: PetscInt nsends, nrecvs; /* numbers of sends and receives */
747: PetscScalar *svalues, *rvalues; /* sending and receiving data */
748: PetscInt rmax; /* maximum message length */
750: /* The following variables are used for matrix-vector products */
751: Vec lvec; /* local vector */
752: PetscSF Mvctx; /* for mat-mult communications */
753: PetscBool roworiented; /* if true, row-oriented input (default) */
755: /* Support for MatDenseGetColumnVec and MatDenseGetSubMatrix */
756: Mat cmat; /* matrix representation of a given subset of columns */
757: Vec cvec; /* vector representation of a given column */
758: const PetscScalar *ptrinuse; /* holds array to be restored (just a placeholder) */
759: PetscInt vecinuse; /* if cvec is in use (col = vecinuse-1) */
760: PetscInt matinuse; /* if cmat is in use (cbegin = matinuse-1) */
761: /* if this is from MatDenseGetSubMatrix, which columns and rows does it correspond to? */
762: PetscInt sub_rbegin;
763: PetscInt sub_rend;
764: PetscInt sub_cbegin;
765: PetscInt sub_cend;
766: } Mat_MPIDense;
768: /*
769: Checking zero pivot for LU, ILU preconditioners.
770: */
771: typedef struct {
772: PetscInt nshift, nshift_max;
773: PetscReal shift_amount, shift_lo, shift_hi, shift_top, shift_fraction;
774: PetscBool newshift;
775: PetscReal rs; /* active row sum of abs(off-diagonals) */
776: PetscScalar pv; /* pivot of the active row */
777: } FactorShiftCtx;
779: PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode MatTransposeCheckNonzeroState_Private(Mat, Mat);
781: /*
782: Used by MatTranspose() and potentially other functions to track the matrix used in the generation of another matrix
783: */
784: typedef struct {
785: PetscObjectId id;
786: PetscObjectState state;
787: PetscObjectState nonzerostate;
788: } MatParentState;
790: PETSC_EXTERN PetscErrorCode MatFactorDumpMatrix(Mat);
791: PETSC_INTERN PetscErrorCode MatSetBlockSizes_Default(Mat, PetscInt, PetscInt);
793: PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode MatShift_Basic(Mat, PetscScalar);
795: static inline PetscErrorCode MatPivotCheck_nz(PETSC_UNUSED Mat mat, const MatFactorInfo *info, FactorShiftCtx *sctx, PETSC_UNUSED PetscInt row)
796: {
797: PetscReal _rs = sctx->rs;
798: PetscReal _zero = info->zeropivot * _rs;
800: PetscFunctionBegin;
801: if (PetscAbsScalar(sctx->pv) <= _zero && !PetscIsNanScalar(sctx->pv)) {
802: /* force |diag| > zeropivot*rs */
803: if (!sctx->nshift) sctx->shift_amount = info->shiftamount;
804: else sctx->shift_amount *= 2.0;
805: sctx->newshift = PETSC_TRUE;
806: (sctx->nshift)++;
807: } else {
808: sctx->newshift = PETSC_FALSE;
809: }
810: PetscFunctionReturn(PETSC_SUCCESS);
811: }
813: static inline PetscErrorCode MatPivotCheck_pd(PETSC_UNUSED Mat mat, const MatFactorInfo *info, FactorShiftCtx *sctx, PETSC_UNUSED PetscInt row)
814: {
815: PetscReal _rs = sctx->rs;
816: PetscReal _zero = info->zeropivot * _rs;
818: PetscFunctionBegin;
819: if (PetscRealPart(sctx->pv) <= _zero && !PetscIsNanScalar(sctx->pv)) {
820: /* force matfactor to be diagonally dominant */
821: if (sctx->nshift == sctx->nshift_max) {
822: sctx->shift_fraction = sctx->shift_hi;
823: } else {
824: sctx->shift_lo = sctx->shift_fraction;
825: sctx->shift_fraction = (sctx->shift_hi + sctx->shift_lo) / (PetscReal)2.;
826: }
827: sctx->shift_amount = sctx->shift_fraction * sctx->shift_top;
828: sctx->nshift++;
829: sctx->newshift = PETSC_TRUE;
830: } else {
831: sctx->newshift = PETSC_FALSE;
832: }
833: PetscFunctionReturn(PETSC_SUCCESS);
834: }
836: static inline PetscErrorCode MatPivotCheck_inblocks(PETSC_UNUSED Mat mat, const MatFactorInfo *info, FactorShiftCtx *sctx, PETSC_UNUSED PetscInt row)
837: {
838: PetscReal _zero = info->zeropivot;
840: PetscFunctionBegin;
841: if (PetscAbsScalar(sctx->pv) <= _zero && !PetscIsNanScalar(sctx->pv)) {
842: sctx->pv += info->shiftamount;
843: sctx->shift_amount = 0.0;
844: sctx->nshift++;
845: }
846: sctx->newshift = PETSC_FALSE;
847: PetscFunctionReturn(PETSC_SUCCESS);
848: }
850: static inline PetscErrorCode MatPivotCheck_none(Mat fact, Mat mat, const MatFactorInfo *info, FactorShiftCtx *sctx, PetscInt row)
851: {
852: PetscReal _zero = info->zeropivot;
854: PetscFunctionBegin;
855: sctx->newshift = PETSC_FALSE;
856: if (PetscAbsScalar(sctx->pv) <= _zero && !PetscIsNanScalar(sctx->pv)) {
857: PetscCheck(!mat->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot row %" PetscInt_FMT " value %g tolerance %g", row, (double)PetscAbsScalar(sctx->pv), (double)_zero);
858: PetscCall(PetscInfo(mat, "Detected zero pivot in factorization in row %" PetscInt_FMT " value %g tolerance %g\n", row, (double)PetscAbsScalar(sctx->pv), (double)_zero));
859: fact->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
860: fact->factorerror_zeropivot_value = PetscAbsScalar(sctx->pv);
861: fact->factorerror_zeropivot_row = row;
862: }
863: PetscFunctionReturn(PETSC_SUCCESS);
864: }
866: static inline PetscErrorCode MatPivotCheck(Mat fact, Mat mat, const MatFactorInfo *info, FactorShiftCtx *sctx, PetscInt row)
867: {
868: PetscFunctionBegin;
869: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) PetscCall(MatPivotCheck_nz(mat, info, sctx, row));
870: else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) PetscCall(MatPivotCheck_pd(mat, info, sctx, row));
871: else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) PetscCall(MatPivotCheck_inblocks(mat, info, sctx, row));
872: else PetscCall(MatPivotCheck_none(fact, mat, info, sctx, row));
873: PetscFunctionReturn(PETSC_SUCCESS);
874: }
876: PETSC_INTERN PetscErrorCode MatADot_Default(Mat, Vec, Vec, PetscScalar *);
877: PETSC_INTERN PetscErrorCode MatANorm_Default(Mat, Vec, PetscReal *);
879: #include <petscbt.h>
880: /*
881: Create and initialize a linked list
882: Input Parameters:
883: idx_start - starting index of the list
884: lnk_max - max value of lnk indicating the end of the list
885: nlnk - max length of the list
886: Output Parameters:
887: lnk - list initialized
888: bt - PetscBT (bitarray) with all bits set to false
889: lnk_empty - flg indicating the list is empty
890: */
891: #define PetscLLCreate(idx_start, lnk_max, nlnk, lnk, bt) ((PetscErrorCode)(PetscMalloc1(nlnk, &lnk) || PetscBTCreate(nlnk, &(bt)) || (lnk[idx_start] = lnk_max, PETSC_SUCCESS)))
893: #define PetscLLCreate_new(idx_start, lnk_max, nlnk, lnk, bt, lnk_empty) ((PetscErrorCode)(PetscMalloc1(nlnk, &lnk) || PetscBTCreate(nlnk, &(bt)) || (lnk_empty = PETSC_TRUE, 0) || (lnk[idx_start] = lnk_max, PETSC_SUCCESS)))
895: static inline PetscErrorCode PetscLLInsertLocation_Private(PetscBool assume_sorted, PetscInt k, PetscInt idx_start, PetscInt entry, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnkdata, PetscInt *PETSC_RESTRICT lnk)
896: {
897: PetscInt location;
899: PetscFunctionBegin;
900: /* start from the beginning if entry < previous entry */
901: if (!assume_sorted && k && entry < *lnkdata) *lnkdata = idx_start;
902: /* search for insertion location */
903: do {
904: location = *lnkdata;
905: *lnkdata = lnk[location];
906: } while (entry > *lnkdata);
907: /* insertion location is found, add entry into lnk */
908: lnk[location] = entry;
909: lnk[entry] = *lnkdata;
910: ++(*nlnk);
911: *lnkdata = entry; /* next search starts from here if next_entry > entry */
912: PetscFunctionReturn(PETSC_SUCCESS);
913: }
915: static inline PetscErrorCode PetscLLAdd_Private(PetscInt nidx, const PetscInt *PETSC_RESTRICT indices, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscBT bt, PetscBool assume_sorted)
916: {
917: PetscFunctionBegin;
918: *nlnk = 0;
919: for (PetscInt k = 0, lnkdata = idx_start; k < nidx; ++k) {
920: const PetscInt entry = indices[k];
922: if (!PetscBTLookupSet(bt, entry)) PetscCall(PetscLLInsertLocation_Private(assume_sorted, k, idx_start, entry, nlnk, &lnkdata, lnk));
923: }
924: PetscFunctionReturn(PETSC_SUCCESS);
925: }
927: /*
928: Add an index set into a sorted linked list
929: Input Parameters:
930: nidx - number of input indices
931: indices - integer array
932: idx_start - starting index of the list
933: lnk - linked list(an integer array) that is created
934: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
935: output Parameters:
936: nlnk - number of newly added indices
937: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from indices
938: bt - updated PetscBT (bitarray)
939: */
940: static inline PetscErrorCode PetscLLAdd(PetscInt nidx, const PetscInt *PETSC_RESTRICT indices, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscBT bt)
941: {
942: PetscFunctionBegin;
943: PetscCall(PetscLLAdd_Private(nidx, indices, idx_start, nlnk, lnk, bt, PETSC_FALSE));
944: PetscFunctionReturn(PETSC_SUCCESS);
945: }
947: /*
948: Add a SORTED ascending index set into a sorted linked list - same as PetscLLAdd() bus skip 'if (_k && _entry < _lnkdata) _lnkdata = idx_start;'
949: Input Parameters:
950: nidx - number of input indices
951: indices - sorted integer array
952: idx_start - starting index of the list
953: lnk - linked list(an integer array) that is created
954: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
955: output Parameters:
956: nlnk - number of newly added indices
957: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from indices
958: bt - updated PetscBT (bitarray)
959: */
960: static inline PetscErrorCode PetscLLAddSorted(PetscInt nidx, const PetscInt *PETSC_RESTRICT indices, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscBT bt)
961: {
962: PetscFunctionBegin;
963: PetscCall(PetscLLAdd_Private(nidx, indices, idx_start, nlnk, lnk, bt, PETSC_TRUE));
964: PetscFunctionReturn(PETSC_SUCCESS);
965: }
967: /*
968: Add a permuted index set into a sorted linked list
969: Input Parameters:
970: nidx - number of input indices
971: indices - integer array
972: perm - permutation of indices
973: idx_start - starting index of the list
974: lnk - linked list(an integer array) that is created
975: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
976: output Parameters:
977: nlnk - number of newly added indices
978: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from indices
979: bt - updated PetscBT (bitarray)
980: */
981: static inline PetscErrorCode PetscLLAddPerm(PetscInt nidx, const PetscInt *PETSC_RESTRICT indices, const PetscInt *PETSC_RESTRICT perm, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscBT bt)
982: {
983: PetscFunctionBegin;
984: *nlnk = 0;
985: for (PetscInt k = 0, lnkdata = idx_start; k < nidx; ++k) {
986: const PetscInt entry = perm[indices[k]];
988: if (!PetscBTLookupSet(bt, entry)) PetscCall(PetscLLInsertLocation_Private(PETSC_FALSE, k, idx_start, entry, nlnk, &lnkdata, lnk));
989: }
990: PetscFunctionReturn(PETSC_SUCCESS);
991: }
993: #if 0
994: /* this appears to be unused? */
995: static inline PetscErrorCode PetscLLAddSorted_new(PetscInt nidx, PetscInt *indices, PetscInt idx_start, PetscBool *lnk_empty, PetscInt *nlnk, PetscInt *lnk, PetscBT bt)
996: {
997: PetscInt lnkdata = idx_start;
999: PetscFunctionBegin;
1000: if (*lnk_empty) {
1001: for (PetscInt k = 0; k < nidx; ++k) {
1002: const PetscInt entry = indices[k], location = lnkdata;
1004: PetscCall(PetscBTSet(bt,entry)); /* mark the new entry */
1005: lnkdata = lnk[location];
1006: /* insertion location is found, add entry into lnk */
1007: lnk[location] = entry;
1008: lnk[entry] = lnkdata;
1009: lnkdata = entry; /* next search starts from here */
1010: }
1011: /* lnk[indices[nidx-1]] = lnk[idx_start];
1012: lnk[idx_start] = indices[0];
1013: PetscCall(PetscBTSet(bt,indices[0]));
1014: for (_k=1; _k<nidx; _k++) {
1015: PetscCall(PetscBTSet(bt,indices[_k]));
1016: lnk[indices[_k-1]] = indices[_k];
1017: }
1018: */
1019: *nlnk = nidx;
1020: *lnk_empty = PETSC_FALSE;
1021: } else {
1022: *nlnk = 0;
1023: for (PetscInt k = 0; k < nidx; ++k) {
1024: const PetscInt entry = indices[k];
1026: if (!PetscBTLookupSet(bt,entry)) PetscCall(PetscLLInsertLocation_Private(PETSC_TRUE,k,idx_start,entry,nlnk,&lnkdata,lnk));
1027: }
1028: }
1029: PetscFunctionReturn(PETSC_SUCCESS);
1030: }
1031: #endif
1033: /*
1034: Add a SORTED index set into a sorted linked list used for LUFactorSymbolic()
1035: Same as PetscLLAddSorted() with an additional operation:
1036: count the number of input indices that are no larger than 'diag'
1037: Input Parameters:
1038: indices - sorted integer array
1039: idx_start - starting index of the list, index of pivot row
1040: lnk - linked list(an integer array) that is created
1041: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1042: diag - index of the active row in LUFactorSymbolic
1043: nzbd - number of input indices with indices <= idx_start
1044: im - im[idx_start] is initialized as num of nonzero entries in row=idx_start
1045: output Parameters:
1046: nlnk - number of newly added indices
1047: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from indices
1048: bt - updated PetscBT (bitarray)
1049: im - im[idx_start]: unchanged if diag is not an entry
1050: : num of entries with indices <= diag if diag is an entry
1051: */
1052: static inline PetscErrorCode PetscLLAddSortedLU(const PetscInt *PETSC_RESTRICT indices, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscBT bt, PetscInt diag, PetscInt nzbd, PetscInt *PETSC_RESTRICT im)
1053: {
1054: const PetscInt nidx = im[idx_start] - nzbd; /* num of entries with idx_start < index <= diag */
1056: PetscFunctionBegin;
1057: *nlnk = 0;
1058: for (PetscInt k = 0, lnkdata = idx_start; k < nidx; ++k) {
1059: const PetscInt entry = indices[k];
1061: ++nzbd;
1062: if (entry == diag) im[idx_start] = nzbd;
1063: if (!PetscBTLookupSet(bt, entry)) PetscCall(PetscLLInsertLocation_Private(PETSC_TRUE, k, idx_start, entry, nlnk, &lnkdata, lnk));
1064: }
1065: PetscFunctionReturn(PETSC_SUCCESS);
1066: }
1068: /*
1069: Copy data on the list into an array, then initialize the list
1070: Input Parameters:
1071: idx_start - starting index of the list
1072: lnk_max - max value of lnk indicating the end of the list
1073: nlnk - number of data on the list to be copied
1074: lnk - linked list
1075: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1076: output Parameters:
1077: indices - array that contains the copied data
1078: lnk - linked list that is cleaned and initialize
1079: bt - PetscBT (bitarray) with all bits set to false
1080: */
1081: static inline PetscErrorCode PetscLLClean(PetscInt idx_start, PetscInt lnk_max, PetscInt nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT indices, PetscBT bt)
1082: {
1083: PetscFunctionBegin;
1084: for (PetscInt j = 0, idx = idx_start; j < nlnk; ++j) {
1085: idx = lnk[idx];
1086: indices[j] = idx;
1087: PetscCall(PetscBTClear(bt, idx));
1088: }
1089: lnk[idx_start] = lnk_max;
1090: PetscFunctionReturn(PETSC_SUCCESS);
1091: }
1093: /*
1094: Free memories used by the list
1095: */
1096: #define PetscLLDestroy(lnk, bt) ((PetscErrorCode)(PetscFree(lnk) || PetscBTDestroy(&(bt))))
1098: /* Routines below are used for incomplete matrix factorization */
1099: /*
1100: Create and initialize a linked list and its levels
1101: Input Parameters:
1102: idx_start - starting index of the list
1103: lnk_max - max value of lnk indicating the end of the list
1104: nlnk - max length of the list
1105: Output Parameters:
1106: lnk - list initialized
1107: lnk_lvl - array of size nlnk for storing levels of lnk
1108: bt - PetscBT (bitarray) with all bits set to false
1109: */
1110: #define PetscIncompleteLLCreate(idx_start, lnk_max, nlnk, lnk, lnk_lvl, bt) \
1111: ((PetscErrorCode)(PetscIntMultError(2, nlnk, NULL) || PetscMalloc1(2 * nlnk, &lnk) || PetscBTCreate(nlnk, &(bt)) || (lnk[idx_start] = lnk_max, lnk_lvl = lnk + nlnk, PETSC_SUCCESS)))
1113: static inline PetscErrorCode PetscIncompleteLLInsertLocation_Private(PetscBool assume_sorted, PetscInt k, PetscInt idx_start, PetscInt entry, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnkdata, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscInt newval)
1114: {
1115: PetscFunctionBegin;
1116: PetscCall(PetscLLInsertLocation_Private(assume_sorted, k, idx_start, entry, nlnk, lnkdata, lnk));
1117: lnklvl[entry] = newval;
1118: PetscFunctionReturn(PETSC_SUCCESS);
1119: }
1121: /*
1122: Initialize a sorted linked list used for ILU and ICC
1123: Input Parameters:
1124: nidx - number of input idx
1125: idx - integer array used for storing column indices
1126: idx_start - starting index of the list
1127: perm - indices of an IS
1128: lnk - linked list(an integer array) that is created
1129: lnklvl - levels of lnk
1130: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1131: output Parameters:
1132: nlnk - number of newly added idx
1133: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1134: lnklvl - levels of lnk
1135: bt - updated PetscBT (bitarray)
1136: */
1137: static inline PetscErrorCode PetscIncompleteLLInit(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscInt idx_start, const PetscInt *PETSC_RESTRICT perm, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt)
1138: {
1139: PetscFunctionBegin;
1140: *nlnk = 0;
1141: for (PetscInt k = 0, lnkdata = idx_start; k < nidx; ++k) {
1142: const PetscInt entry = perm[idx[k]];
1144: if (!PetscBTLookupSet(bt, entry)) PetscCall(PetscIncompleteLLInsertLocation_Private(PETSC_FALSE, k, idx_start, entry, nlnk, &lnkdata, lnk, lnklvl, 0));
1145: }
1146: PetscFunctionReturn(PETSC_SUCCESS);
1147: }
1149: static inline PetscErrorCode PetscIncompleteLLAdd_Private(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscReal level, const PetscInt *PETSC_RESTRICT idxlvl, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt, PetscInt prow_offset, PetscBool assume_sorted)
1150: {
1151: PetscFunctionBegin;
1152: *nlnk = 0;
1153: for (PetscInt k = 0, lnkdata = idx_start; k < nidx; ++k) {
1154: const PetscInt incrlev = idxlvl[k] + prow_offset + 1;
1156: if (incrlev <= level) {
1157: const PetscInt entry = idx[k];
1159: if (!PetscBTLookupSet(bt, entry)) PetscCall(PetscIncompleteLLInsertLocation_Private(assume_sorted, k, idx_start, entry, nlnk, &lnkdata, lnk, lnklvl, incrlev));
1160: else if (lnklvl[entry] > incrlev) lnklvl[entry] = incrlev; /* existing entry */
1161: }
1162: }
1163: PetscFunctionReturn(PETSC_SUCCESS);
1164: }
1166: /*
1167: Add a SORTED index set into a sorted linked list for ICC
1168: Input Parameters:
1169: nidx - number of input indices
1170: idx - sorted integer array used for storing column indices
1171: level - level of fill, e.g., ICC(level)
1172: idxlvl - level of idx
1173: idx_start - starting index of the list
1174: lnk - linked list(an integer array) that is created
1175: lnklvl - levels of lnk
1176: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1177: idxlvl_prow - idxlvl[prow], where prow is the row number of the idx
1178: output Parameters:
1179: nlnk - number of newly added indices
1180: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1181: lnklvl - levels of lnk
1182: bt - updated PetscBT (bitarray)
1183: Note: the level of U(i,j) is set as lvl(i,j) = min{ lvl(i,j), lvl(prow,i)+lvl(prow,j)+1)
1184: where idx = non-zero columns of U(prow,prow+1:n-1), prow<i
1185: */
1186: static inline PetscErrorCode PetscICCLLAddSorted(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscReal level, const PetscInt *PETSC_RESTRICT idxlvl, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt, PetscInt idxlvl_prow)
1187: {
1188: PetscFunctionBegin;
1189: PetscCall(PetscIncompleteLLAdd_Private(nidx, idx, level, idxlvl, idx_start, nlnk, lnk, lnklvl, bt, idxlvl_prow, PETSC_TRUE));
1190: PetscFunctionReturn(PETSC_SUCCESS);
1191: }
1193: /*
1194: Add a SORTED index set into a sorted linked list for ILU
1195: Input Parameters:
1196: nidx - number of input indices
1197: idx - sorted integer array used for storing column indices
1198: level - level of fill, e.g., ICC(level)
1199: idxlvl - level of idx
1200: idx_start - starting index of the list
1201: lnk - linked list(an integer array) that is created
1202: lnklvl - levels of lnk
1203: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1204: prow - the row number of idx
1205: output Parameters:
1206: nlnk - number of newly added idx
1207: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1208: lnklvl - levels of lnk
1209: bt - updated PetscBT (bitarray)
1211: Note: the level of factor(i,j) is set as lvl(i,j) = min{ lvl(i,j), lvl(i,prow)+lvl(prow,j)+1)
1212: where idx = non-zero columns of U(prow,prow+1:n-1), prow<i
1213: */
1214: static inline PetscErrorCode PetscILULLAddSorted(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscInt level, const PetscInt *PETSC_RESTRICT idxlvl, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt, PetscInt prow)
1215: {
1216: PetscFunctionBegin;
1217: PetscCall(PetscIncompleteLLAdd_Private(nidx, idx, level, idxlvl, idx_start, nlnk, lnk, lnklvl, bt, lnklvl[prow], PETSC_TRUE));
1218: PetscFunctionReturn(PETSC_SUCCESS);
1219: }
1221: /*
1222: Add a index set into a sorted linked list
1223: Input Parameters:
1224: nidx - number of input idx
1225: idx - integer array used for storing column indices
1226: level - level of fill, e.g., ICC(level)
1227: idxlvl - level of idx
1228: idx_start - starting index of the list
1229: lnk - linked list(an integer array) that is created
1230: lnklvl - levels of lnk
1231: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1232: output Parameters:
1233: nlnk - number of newly added idx
1234: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1235: lnklvl - levels of lnk
1236: bt - updated PetscBT (bitarray)
1237: */
1238: static inline PetscErrorCode PetscIncompleteLLAdd(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscReal level, const PetscInt *PETSC_RESTRICT idxlvl, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt)
1239: {
1240: PetscFunctionBegin;
1241: PetscCall(PetscIncompleteLLAdd_Private(nidx, idx, level, idxlvl, idx_start, nlnk, lnk, lnklvl, bt, 0, PETSC_FALSE));
1242: PetscFunctionReturn(PETSC_SUCCESS);
1243: }
1245: /*
1246: Add a SORTED index set into a sorted linked list
1247: Input Parameters:
1248: nidx - number of input indices
1249: idx - sorted integer array used for storing column indices
1250: level - level of fill, e.g., ICC(level)
1251: idxlvl - level of idx
1252: idx_start - starting index of the list
1253: lnk - linked list(an integer array) that is created
1254: lnklvl - levels of lnk
1255: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1256: output Parameters:
1257: nlnk - number of newly added idx
1258: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1259: lnklvl - levels of lnk
1260: bt - updated PetscBT (bitarray)
1261: */
1262: static inline PetscErrorCode PetscIncompleteLLAddSorted(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscReal level, const PetscInt *PETSC_RESTRICT idxlvl, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt)
1263: {
1264: PetscFunctionBegin;
1265: PetscCall(PetscIncompleteLLAdd_Private(nidx, idx, level, idxlvl, idx_start, nlnk, lnk, lnklvl, bt, 0, PETSC_TRUE));
1266: PetscFunctionReturn(PETSC_SUCCESS);
1267: }
1269: /*
1270: Copy data on the list into an array, then initialize the list
1271: Input Parameters:
1272: idx_start - starting index of the list
1273: lnk_max - max value of lnk indicating the end of the list
1274: nlnk - number of data on the list to be copied
1275: lnk - linked list
1276: lnklvl - level of lnk
1277: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1278: output Parameters:
1279: indices - array that contains the copied data
1280: lnk - linked list that is cleaned and initialize
1281: lnklvl - level of lnk that is reinitialized
1282: bt - PetscBT (bitarray) with all bits set to false
1283: */
1284: static inline PetscErrorCode PetscIncompleteLLClean(PetscInt idx_start, PetscInt lnk_max, PetscInt nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscInt *PETSC_RESTRICT indices, PetscInt *PETSC_RESTRICT indiceslvl, PetscBT bt)
1285: {
1286: PetscFunctionBegin;
1287: for (PetscInt j = 0, idx = idx_start; j < nlnk; ++j) {
1288: idx = lnk[idx];
1289: indices[j] = idx;
1290: indiceslvl[j] = lnklvl[idx];
1291: lnklvl[idx] = -1;
1292: PetscCall(PetscBTClear(bt, idx));
1293: }
1294: lnk[idx_start] = lnk_max;
1295: PetscFunctionReturn(PETSC_SUCCESS);
1296: }
1298: /*
1299: Free memories used by the list
1300: */
1301: #define PetscIncompleteLLDestroy(lnk, bt) ((PetscErrorCode)(PetscFree(lnk) || PetscBTDestroy(&(bt))))
1303: #if !defined(PETSC_CLANG_STATIC_ANALYZER)
1304: #define MatCheckSameLocalSize(A, ar1, B, ar2) \
1305: do { \
1306: PetscCheckSameComm(A, ar1, B, ar2); \
1307: PetscCheck(((A)->rmap->n == (B)->rmap->n) && ((A)->cmap->n == (B)->cmap->n), PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Incompatible matrix local sizes: parameter # %d (%" PetscInt_FMT " x %" PetscInt_FMT ") != parameter # %d (%" PetscInt_FMT " x %" PetscInt_FMT ")", ar1, \
1308: (A)->rmap->n, (A)->cmap->n, ar2, (B)->rmap->n, (B)->cmap->n); \
1309: } while (0)
1310: #define MatCheckSameSize(A, ar1, B, ar2) \
1311: do { \
1312: PetscCheck(((A)->rmap->N == (B)->rmap->N) && ((A)->cmap->N == (B)->cmap->N), PetscObjectComm((PetscObject)(A)), PETSC_ERR_ARG_INCOMP, "Incompatible matrix global sizes: parameter # %d (%" PetscInt_FMT " x %" PetscInt_FMT ") != parameter # %d (%" PetscInt_FMT " x %" PetscInt_FMT ")", ar1, \
1313: (A)->rmap->N, (A)->cmap->N, ar2, (B)->rmap->N, (B)->cmap->N); \
1314: MatCheckSameLocalSize(A, ar1, B, ar2); \
1315: } while (0)
1316: #else
1317: template <typename Tm>
1318: extern void MatCheckSameLocalSize(Tm, int, Tm, int);
1319: template <typename Tm>
1320: extern void MatCheckSameSize(Tm, int, Tm, int);
1321: #endif
1323: #define VecCheckMatCompatible(M, x, ar1, b, ar2) \
1324: do { \
1325: PetscCheck((M)->cmap->N == (x)->map->N, PetscObjectComm((PetscObject)(M)), PETSC_ERR_ARG_SIZ, "Vector global length incompatible with matrix: parameter # %d global size %" PetscInt_FMT " != matrix column global size %" PetscInt_FMT, ar1, (x)->map->N, \
1326: (M)->cmap->N); \
1327: PetscCheck((M)->rmap->N == (b)->map->N, PetscObjectComm((PetscObject)(M)), PETSC_ERR_ARG_SIZ, "Vector global length incompatible with matrix: parameter # %d global size %" PetscInt_FMT " != matrix row global size %" PetscInt_FMT, ar2, (b)->map->N, \
1328: (M)->rmap->N); \
1329: } while (0)
1331: /*
1332: Create and initialize a condensed linked list -
1333: same as PetscLLCreate(), but uses a scalable array 'lnk' with size of max number of entries, not O(N).
1334: Barry suggested this approach (Dec. 6, 2011):
1335: I've thought of an alternative way of representing a linked list that is efficient but doesn't have the O(N) scaling issue
1336: (it may be faster than the O(N) even sequentially due to less crazy memory access).
1338: Instead of having some like a 2 -> 4 -> 11 -> 22 list that uses slot 2 4 11 and 22 in a big array use a small array with two slots
1339: for each entry for example [ 2 1 | 4 3 | 22 -1 | 11 2] so the first number (of the pair) is the value while the second tells you where
1340: in the list the next entry is. Inserting a new link means just append another pair at the end. For example say we want to insert 13 into the
1341: list it would then become [2 1 | 4 3 | 22 -1 | 11 4 | 13 2 ] you just add a pair at the end and fix the point for the one that points to it.
1342: That is 11 use to point to the 2 slot, after the change 11 points to the 4th slot which has the value 13. Note that values are always next
1343: to each other so memory access is much better than using the big array.
1345: Example:
1346: nlnk_max=5, lnk_max=36:
1347: Initial list: [0, 0 | 36, 2 | 0, 0 | 0, 0 | 0, 0 | 0, 0 | 0, 0]
1348: here, head_node has index 2 with value lnk[2]=lnk_max=36,
1349: 0-th entry is used to store the number of entries in the list,
1350: The initial lnk represents head -> tail(marked by 36) with number of entries = lnk[0]=0.
1352: Now adding a sorted set {2,4}, the list becomes
1353: [2, 0 | 36, 4 |2, 6 | 4, 2 | 0, 0 | 0, 0 | 0, 0 ]
1354: represents head -> 2 -> 4 -> tail with number of entries = lnk[0]=2.
1356: Then adding a sorted set {0,3,35}, the list
1357: [5, 0 | 36, 8 | 2, 10 | 4, 12 | 0, 4 | 3, 6 | 35, 2 ]
1358: represents head -> 0 -> 2 -> 3 -> 4 -> 35 -> tail with number of entries = lnk[0]=5.
1360: Input Parameters:
1361: nlnk_max - max length of the list
1362: lnk_max - max value of the entries
1363: Output Parameters:
1364: lnk - list created and initialized
1365: bt - PetscBT (bitarray) with all bits set to false. Note: bt has size lnk_max, not nln_max!
1366: */
1367: static inline PetscErrorCode PetscLLCondensedCreate(PetscInt nlnk_max, PetscInt lnk_max, PetscInt **lnk, PetscBT *bt)
1368: {
1369: PetscInt *llnk, lsize = 0;
1371: PetscFunctionBegin;
1372: PetscCall(PetscIntMultError(2, nlnk_max + 2, &lsize));
1373: PetscCall(PetscMalloc1(lsize, lnk));
1374: PetscCall(PetscBTCreate(lnk_max, bt));
1375: llnk = *lnk;
1376: llnk[0] = 0; /* number of entries on the list */
1377: llnk[2] = lnk_max; /* value in the head node */
1378: llnk[3] = 2; /* next for the head node */
1379: PetscFunctionReturn(PETSC_SUCCESS);
1380: }
1382: /*
1383: Add a SORTED ascending index set into a sorted linked list. See PetscLLCondensedCreate() for detailed description.
1384: Input Parameters:
1385: nidx - number of input indices
1386: indices - sorted integer array
1387: lnk - condensed linked list(an integer array) that is created
1388: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1389: output Parameters:
1390: lnk - the sorted(increasing order) linked list containing previous and newly added non-redundate indices
1391: bt - updated PetscBT (bitarray)
1392: */
1393: static inline PetscErrorCode PetscLLCondensedAddSorted(PetscInt nidx, const PetscInt indices[], PetscInt lnk[], PetscBT bt)
1394: {
1395: PetscInt location = 2; /* head */
1396: PetscInt nlnk = lnk[0]; /* num of entries on the input lnk */
1398: PetscFunctionBegin;
1399: for (PetscInt k = 0; k < nidx; k++) {
1400: const PetscInt entry = indices[k];
1401: if (!PetscBTLookupSet(bt, entry)) { /* new entry */
1402: PetscInt next, lnkdata;
1404: /* search for insertion location */
1405: do {
1406: next = location + 1; /* link from previous node to next node */
1407: location = lnk[next]; /* idx of next node */
1408: lnkdata = lnk[location]; /* value of next node */
1409: } while (entry > lnkdata);
1410: /* insertion location is found, add entry into lnk */
1411: const PetscInt newnode = 2 * (nlnk + 2); /* index for this new node */
1412: lnk[next] = newnode; /* connect previous node to the new node */
1413: lnk[newnode] = entry; /* set value of the new node */
1414: lnk[newnode + 1] = location; /* connect new node to next node */
1415: location = newnode; /* next search starts from the new node */
1416: nlnk++;
1417: }
1418: }
1419: lnk[0] = nlnk; /* number of entries in the list */
1420: PetscFunctionReturn(PETSC_SUCCESS);
1421: }
1423: static inline PetscErrorCode PetscLLCondensedClean(PetscInt lnk_max, PETSC_UNUSED PetscInt nidx, PetscInt *indices, PetscInt lnk[], PetscBT bt)
1424: {
1425: const PetscInt nlnk = lnk[0]; /* num of entries on the list */
1426: PetscInt next = lnk[3]; /* head node */
1428: PetscFunctionBegin;
1429: for (PetscInt k = 0; k < nlnk; k++) {
1430: indices[k] = lnk[next];
1431: next = lnk[next + 1];
1432: PetscCall(PetscBTClear(bt, indices[k]));
1433: }
1434: lnk[0] = 0; /* num of entries on the list */
1435: lnk[2] = lnk_max; /* initialize head node */
1436: lnk[3] = 2; /* head node */
1437: PetscFunctionReturn(PETSC_SUCCESS);
1438: }
1440: static inline PetscErrorCode PetscLLCondensedView(PetscInt *lnk)
1441: {
1442: PetscFunctionBegin;
1443: PetscCall(PetscPrintf(PETSC_COMM_SELF, "LLCondensed of size %" PetscInt_FMT ", (val, next)\n", lnk[0]));
1444: for (PetscInt k = 2; k < lnk[0] + 2; ++k) PetscCall(PetscPrintf(PETSC_COMM_SELF, " %" PetscInt_FMT ": (%" PetscInt_FMT ", %" PetscInt_FMT ")\n", 2 * k, lnk[2 * k], lnk[2 * k + 1]));
1445: PetscFunctionReturn(PETSC_SUCCESS);
1446: }
1448: /*
1449: Free memories used by the list
1450: */
1451: static inline PetscErrorCode PetscLLCondensedDestroy(PetscInt *lnk, PetscBT bt)
1452: {
1453: PetscFunctionBegin;
1454: PetscCall(PetscFree(lnk));
1455: PetscCall(PetscBTDestroy(&bt));
1456: PetscFunctionReturn(PETSC_SUCCESS);
1457: }
1459: /*
1460: Same as PetscLLCondensedCreate(), but does not use non-scalable O(lnk_max) bitarray
1461: Input Parameters:
1462: nlnk_max - max length of the list
1463: Output Parameters:
1464: lnk - list created and initialized
1465: */
1466: static inline PetscErrorCode PetscLLCondensedCreate_Scalable(PetscInt nlnk_max, PetscInt **lnk)
1467: {
1468: PetscInt *llnk, lsize = 0;
1470: PetscFunctionBegin;
1471: PetscCall(PetscIntMultError(2, nlnk_max + 2, &lsize));
1472: PetscCall(PetscMalloc1(lsize, lnk));
1473: llnk = *lnk;
1474: llnk[0] = 0; /* number of entries on the list */
1475: llnk[2] = PETSC_INT_MAX; /* value in the head node */
1476: llnk[3] = 2; /* next for the head node */
1477: PetscFunctionReturn(PETSC_SUCCESS);
1478: }
1480: static inline PetscErrorCode PetscLLCondensedExpand_Scalable(PetscInt nlnk_max, PetscInt **lnk)
1481: {
1482: PetscInt lsize = 0;
1484: PetscFunctionBegin;
1485: PetscCall(PetscIntMultError(2, nlnk_max + 2, &lsize));
1486: PetscCall(PetscRealloc(lsize * sizeof(PetscInt), lnk));
1487: PetscFunctionReturn(PETSC_SUCCESS);
1488: }
1490: static inline PetscErrorCode PetscLLCondensedAddSorted_Scalable(PetscInt nidx, const PetscInt indices[], PetscInt lnk[])
1491: {
1492: PetscInt location = 2; /* head */
1493: PetscInt nlnk = lnk[0]; /* num of entries on the input lnk */
1495: for (PetscInt k = 0; k < nidx; k++) {
1496: const PetscInt entry = indices[k];
1497: PetscInt next, lnkdata;
1499: /* search for insertion location */
1500: do {
1501: next = location + 1; /* link from previous node to next node */
1502: location = lnk[next]; /* idx of next node */
1503: lnkdata = lnk[location]; /* value of next node */
1504: } while (entry > lnkdata);
1505: if (entry < lnkdata) {
1506: /* insertion location is found, add entry into lnk */
1507: const PetscInt newnode = 2 * (nlnk + 2); /* index for this new node */
1508: lnk[next] = newnode; /* connect previous node to the new node */
1509: lnk[newnode] = entry; /* set value of the new node */
1510: lnk[newnode + 1] = location; /* connect new node to next node */
1511: location = newnode; /* next search starts from the new node */
1512: nlnk++;
1513: }
1514: }
1515: lnk[0] = nlnk; /* number of entries in the list */
1516: return PETSC_SUCCESS;
1517: }
1519: static inline PetscErrorCode PetscLLCondensedClean_Scalable(PETSC_UNUSED PetscInt nidx, PetscInt *indices, PetscInt *lnk)
1520: {
1521: const PetscInt nlnk = lnk[0];
1522: PetscInt next = lnk[3]; /* head node */
1524: for (PetscInt k = 0; k < nlnk; k++) {
1525: indices[k] = lnk[next];
1526: next = lnk[next + 1];
1527: }
1528: lnk[0] = 0; /* num of entries on the list */
1529: lnk[3] = 2; /* head node */
1530: return PETSC_SUCCESS;
1531: }
1533: static inline PetscErrorCode PetscLLCondensedDestroy_Scalable(PetscInt *lnk)
1534: {
1535: return PetscFree(lnk);
1536: }
1538: /*
1539: lnk[0] number of links
1540: lnk[1] number of entries
1541: lnk[3n] value
1542: lnk[3n+1] len
1543: lnk[3n+2] link to next value
1545: The next three are always the first link
1547: lnk[3] PETSC_INT_MIN+1
1548: lnk[4] 1
1549: lnk[5] link to first real entry
1551: The next three are always the last link
1553: lnk[6] PETSC_INT_MAX - 1
1554: lnk[7] 1
1555: lnk[8] next valid link (this is the same as lnk[0] but without the decreases)
1556: */
1558: static inline PetscErrorCode PetscLLCondensedCreate_fast(PetscInt nlnk_max, PetscInt **lnk)
1559: {
1560: PetscInt *llnk;
1561: PetscInt lsize = 0;
1563: PetscFunctionBegin;
1564: PetscCall(PetscIntMultError(3, nlnk_max + 3, &lsize));
1565: PetscCall(PetscMalloc1(lsize, lnk));
1566: llnk = *lnk;
1567: llnk[0] = 0; /* nlnk: number of entries on the list */
1568: llnk[1] = 0; /* number of integer entries represented in list */
1569: llnk[3] = PETSC_INT_MIN + 1; /* value in the first node */
1570: llnk[4] = 1; /* count for the first node */
1571: llnk[5] = 6; /* next for the first node */
1572: llnk[6] = PETSC_INT_MAX - 1; /* value in the last node */
1573: llnk[7] = 1; /* count for the last node */
1574: llnk[8] = 0; /* next valid node to be used */
1575: PetscFunctionReturn(PETSC_SUCCESS);
1576: }
1578: static inline PetscErrorCode PetscLLCondensedAddSorted_fast(PetscInt nidx, const PetscInt indices[], PetscInt lnk[])
1579: {
1580: for (PetscInt k = 0, prev = 3 /* first value */; k < nidx; k++) {
1581: const PetscInt entry = indices[k];
1582: PetscInt next = lnk[prev + 2];
1584: /* search for insertion location */
1585: while (entry >= lnk[next]) {
1586: prev = next;
1587: next = lnk[next + 2];
1588: }
1589: /* entry is in range of previous list */
1590: if (entry < lnk[prev] + lnk[prev + 1]) continue;
1591: lnk[1]++;
1592: /* entry is right after previous list */
1593: if (entry == lnk[prev] + lnk[prev + 1]) {
1594: lnk[prev + 1]++;
1595: if (lnk[next] == entry + 1) { /* combine two contiguous strings */
1596: lnk[prev + 1] += lnk[next + 1];
1597: lnk[prev + 2] = lnk[next + 2];
1598: next = lnk[next + 2];
1599: lnk[0]--;
1600: }
1601: continue;
1602: }
1603: /* entry is right before next list */
1604: if (entry == lnk[next] - 1) {
1605: lnk[next]--;
1606: lnk[next + 1]++;
1607: prev = next;
1608: next = lnk[prev + 2];
1609: continue;
1610: }
1611: /* add entry into lnk */
1612: lnk[prev + 2] = 3 * ((lnk[8]++) + 3); /* connect previous node to the new node */
1613: prev = lnk[prev + 2];
1614: lnk[prev] = entry; /* set value of the new node */
1615: lnk[prev + 1] = 1; /* number of values in contiguous string is one to start */
1616: lnk[prev + 2] = next; /* connect new node to next node */
1617: lnk[0]++;
1618: }
1619: return PETSC_SUCCESS;
1620: }
1622: static inline PetscErrorCode PetscLLCondensedClean_fast(PETSC_UNUSED PetscInt nidx, PetscInt *indices, PetscInt *lnk)
1623: {
1624: const PetscInt nlnk = lnk[0];
1625: PetscInt next = lnk[5]; /* first node */
1627: for (PetscInt k = 0, cnt = 0; k < nlnk; k++) {
1628: for (PetscInt j = 0; j < lnk[next + 1]; j++) indices[cnt++] = lnk[next] + j;
1629: next = lnk[next + 2];
1630: }
1631: lnk[0] = 0; /* nlnk: number of links */
1632: lnk[1] = 0; /* number of integer entries represented in list */
1633: lnk[3] = PETSC_INT_MIN + 1; /* value in the first node */
1634: lnk[4] = 1; /* count for the first node */
1635: lnk[5] = 6; /* next for the first node */
1636: lnk[6] = PETSC_INT_MAX - 1; /* value in the last node */
1637: lnk[7] = 1; /* count for the last node */
1638: lnk[8] = 0; /* next valid location to make link */
1639: return PETSC_SUCCESS;
1640: }
1642: static inline PetscErrorCode PetscLLCondensedView_fast(const PetscInt *lnk)
1643: {
1644: const PetscInt nlnk = lnk[0];
1645: PetscInt next = lnk[5]; /* first node */
1647: for (PetscInt k = 0; k < nlnk; k++) {
1648: #if 0 /* Debugging code */
1649: printf("%d value %d len %d next %d\n", next, lnk[next], lnk[next + 1], lnk[next + 2]);
1650: #endif
1651: next = lnk[next + 2];
1652: }
1653: return PETSC_SUCCESS;
1654: }
1656: static inline PetscErrorCode PetscLLCondensedDestroy_fast(PetscInt *lnk)
1657: {
1658: return PetscFree(lnk);
1659: }
1661: PETSC_EXTERN PetscErrorCode PetscCDCreate(PetscInt, PetscCoarsenData **);
1662: PETSC_EXTERN PetscErrorCode PetscCDDestroy(PetscCoarsenData *);
1663: PETSC_EXTERN PetscErrorCode PetscCDIntNdSetID(PetscCDIntNd *, PetscInt);
1664: PETSC_EXTERN PetscErrorCode PetscCDIntNdGetID(const PetscCDIntNd *, PetscInt *);
1665: PETSC_EXTERN PetscErrorCode PetscCDAppendID(PetscCoarsenData *, PetscInt, PetscInt);
1666: PETSC_EXTERN PetscErrorCode PetscCDMoveAppend(PetscCoarsenData *, PetscInt, PetscInt);
1667: PETSC_EXTERN PetscErrorCode PetscCDAppendNode(PetscCoarsenData *, PetscInt, PetscCDIntNd *);
1668: PETSC_EXTERN PetscErrorCode PetscCDRemoveNextNode(PetscCoarsenData *, PetscInt, PetscCDIntNd *);
1669: PETSC_EXTERN PetscErrorCode PetscCDCountAt(const PetscCoarsenData *, PetscInt, PetscInt *);
1670: PETSC_EXTERN PetscErrorCode PetscCDIsEmptyAt(const PetscCoarsenData *, PetscInt, PetscBool *);
1671: PETSC_EXTERN PetscErrorCode PetscCDSetChunkSize(PetscCoarsenData *, PetscInt);
1672: PETSC_EXTERN PetscErrorCode PetscCDPrint(const PetscCoarsenData *, PetscInt, MPI_Comm);
1673: PETSC_EXTERN PetscErrorCode PetscCDGetNonemptyIS(PetscCoarsenData *, IS *);
1674: PETSC_EXTERN PetscErrorCode PetscCDGetMat(PetscCoarsenData *, Mat *);
1675: PETSC_EXTERN PetscErrorCode PetscCDSetMat(PetscCoarsenData *, Mat);
1676: PETSC_EXTERN PetscErrorCode PetscCDClearMat(PetscCoarsenData *);
1677: PETSC_EXTERN PetscErrorCode PetscCDRemoveAllAt(PetscCoarsenData *, PetscInt);
1678: PETSC_EXTERN PetscErrorCode PetscCDCount(const PetscCoarsenData *, PetscInt *_sz);
1680: PETSC_EXTERN PetscErrorCode PetscCDGetHeadPos(const PetscCoarsenData *, PetscInt, PetscCDIntNd **);
1681: PETSC_EXTERN PetscErrorCode PetscCDGetNextPos(const PetscCoarsenData *, PetscInt, PetscCDIntNd **);
1682: PETSC_EXTERN PetscErrorCode PetscCDGetASMBlocks(const PetscCoarsenData *, const PetscInt, PetscInt *, IS **);
1684: PETSC_SINGLE_LIBRARY_VISIBILITY_INTERNAL PetscErrorCode MatFDColoringApply_AIJ(Mat, MatFDColoring, Vec, void *);
1686: typedef struct {
1687: Vec diag;
1688: PetscBool diag_valid;
1689: Vec inv_diag;
1690: PetscBool inv_diag_valid;
1691: PetscObjectState diag_state, inv_diag_state;
1692: PetscInt *col;
1693: PetscScalar *val;
1694: } Mat_Diagonal;
1696: #if PetscDefined(HAVE_CUDA)
1697: PETSC_INTERN PetscErrorCode MatADot_Diagonal_SeqCUDA(Mat, Vec, Vec, PetscScalar *);
1698: PETSC_INTERN PetscErrorCode MatANormSq_Diagonal_SeqCUDA(Mat, Vec, PetscReal *);
1699: #endif
1700: #if PetscDefined(HAVE_HIP)
1701: PETSC_INTERN PetscErrorCode MatADot_Diagonal_SeqHIP(Mat, Vec, Vec, PetscScalar *);
1702: PETSC_INTERN PetscErrorCode MatANormSq_Diagonal_SeqHIP(Mat, Vec, PetscReal *);
1703: #endif
1704: #if PetscDefined(HAVE_KOKKOS_KERNELS)
1705: PETSC_INTERN PetscErrorCode MatADot_Diagonal_SeqKokkos(Mat, Vec, Vec, PetscScalar *);
1706: PETSC_INTERN PetscErrorCode MatANormSq_Diagonal_SeqKokkos(Mat, Vec, PetscReal *);
1707: #endif
1709: PETSC_EXTERN PetscLogEvent MAT_Mult;
1710: PETSC_EXTERN PetscLogEvent MAT_MultAdd;
1711: PETSC_EXTERN PetscLogEvent MAT_MultTranspose;
1712: PETSC_EXTERN PetscLogEvent MAT_MultHermitianTranspose;
1713: PETSC_EXTERN PetscLogEvent MAT_MultTransposeAdd;
1714: PETSC_EXTERN PetscLogEvent MAT_MultHermitianTransposeAdd;
1715: PETSC_EXTERN PetscLogEvent MAT_ADot;
1716: PETSC_EXTERN PetscLogEvent MAT_ANorm;
1717: PETSC_EXTERN PetscLogEvent MAT_Solve;
1718: PETSC_EXTERN PetscLogEvent MAT_Solves;
1719: PETSC_EXTERN PetscLogEvent MAT_SolveAdd;
1720: PETSC_EXTERN PetscLogEvent MAT_SolveTranspose;
1721: PETSC_EXTERN PetscLogEvent MAT_SolveTransposeAdd;
1722: PETSC_EXTERN PetscLogEvent MAT_SOR;
1723: PETSC_EXTERN PetscLogEvent MAT_ForwardSolve;
1724: PETSC_EXTERN PetscLogEvent MAT_BackwardSolve;
1725: PETSC_EXTERN PetscLogEvent MAT_LUFactor;
1726: PETSC_EXTERN PetscLogEvent MAT_LUFactorSymbolic;
1727: PETSC_EXTERN PetscLogEvent MAT_LUFactorNumeric;
1728: PETSC_EXTERN PetscLogEvent MAT_QRFactor;
1729: PETSC_EXTERN PetscLogEvent MAT_QRFactorSymbolic;
1730: PETSC_EXTERN PetscLogEvent MAT_QRFactorNumeric;
1731: PETSC_EXTERN PetscLogEvent MAT_CholeskyFactor;
1732: PETSC_EXTERN PetscLogEvent MAT_CholeskyFactorSymbolic;
1733: PETSC_EXTERN PetscLogEvent MAT_CholeskyFactorNumeric;
1734: PETSC_EXTERN PetscLogEvent MAT_ILUFactor;
1735: PETSC_EXTERN PetscLogEvent MAT_ILUFactorSymbolic;
1736: PETSC_EXTERN PetscLogEvent MAT_ICCFactorSymbolic;
1737: PETSC_EXTERN PetscLogEvent MAT_Copy;
1738: PETSC_EXTERN PetscLogEvent MAT_Convert;
1739: PETSC_EXTERN PetscLogEvent MAT_Scale;
1740: PETSC_EXTERN PetscLogEvent MAT_AssemblyBegin;
1741: PETSC_EXTERN PetscLogEvent MAT_AssemblyEnd;
1742: PETSC_EXTERN PetscLogEvent MAT_SetValues;
1743: PETSC_EXTERN PetscLogEvent MAT_GetValues;
1744: PETSC_EXTERN PetscLogEvent MAT_GetRow;
1745: PETSC_EXTERN PetscLogEvent MAT_GetRowIJ;
1746: PETSC_EXTERN PetscLogEvent MAT_CreateSubMats;
1747: PETSC_EXTERN PetscLogEvent MAT_GetOrdering;
1748: PETSC_EXTERN PetscLogEvent MAT_RedundantMat;
1749: PETSC_EXTERN PetscLogEvent MAT_IncreaseOverlap;
1750: PETSC_EXTERN PetscLogEvent MAT_Partitioning;
1751: PETSC_EXTERN PetscLogEvent MAT_PartitioningND;
1752: PETSC_EXTERN PetscLogEvent MAT_Coarsen;
1753: PETSC_EXTERN PetscLogEvent MAT_ZeroEntries;
1754: PETSC_EXTERN PetscLogEvent MAT_Load;
1755: PETSC_EXTERN PetscLogEvent MAT_View;
1756: PETSC_EXTERN PetscLogEvent MAT_AXPY;
1757: PETSC_EXTERN PetscLogEvent MAT_FDColoringCreate;
1758: PETSC_EXTERN PetscLogEvent MAT_TransposeColoringCreate;
1759: PETSC_EXTERN PetscLogEvent MAT_FDColoringSetUp;
1760: PETSC_EXTERN PetscLogEvent MAT_FDColoringApply;
1761: PETSC_EXTERN PetscLogEvent MAT_Transpose;
1762: PETSC_EXTERN PetscLogEvent MAT_FDColoringFunction;
1763: PETSC_EXTERN PetscLogEvent MAT_CreateSubMat;
1764: PETSC_EXTERN PetscLogEvent MAT_MatSolve;
1765: PETSC_EXTERN PetscLogEvent MAT_MatTrSolve;
1766: PETSC_EXTERN PetscLogEvent MAT_MatMultSymbolic;
1767: PETSC_EXTERN PetscLogEvent MAT_MatMultNumeric;
1768: PETSC_EXTERN PetscLogEvent MAT_Getlocalmatcondensed;
1769: PETSC_EXTERN PetscLogEvent MAT_GetBrowsOfAcols;
1770: PETSC_EXTERN PetscLogEvent MAT_GetBrowsOfAocols;
1771: PETSC_EXTERN PetscLogEvent MAT_PtAPSymbolic;
1772: PETSC_EXTERN PetscLogEvent MAT_PtAPNumeric;
1773: PETSC_EXTERN PetscLogEvent MAT_Seqstompinum;
1774: PETSC_EXTERN PetscLogEvent MAT_Seqstompisym;
1775: PETSC_EXTERN PetscLogEvent MAT_Seqstompi;
1776: PETSC_EXTERN PetscLogEvent MAT_Getlocalmat;
1777: PETSC_EXTERN PetscLogEvent MAT_RARtSymbolic;
1778: PETSC_EXTERN PetscLogEvent MAT_RARtNumeric;
1779: PETSC_EXTERN PetscLogEvent MAT_MatTransposeMultSymbolic;
1780: PETSC_EXTERN PetscLogEvent MAT_MatTransposeMultNumeric;
1781: PETSC_EXTERN PetscLogEvent MAT_TransposeMatMultSymbolic;
1782: PETSC_EXTERN PetscLogEvent MAT_TransposeMatMultNumeric;
1783: PETSC_EXTERN PetscLogEvent MAT_MatMatMultSymbolic;
1784: PETSC_EXTERN PetscLogEvent MAT_MatMatMultNumeric;
1785: PETSC_EXTERN PetscLogEvent MAT_Getsymtransreduced;
1786: PETSC_EXTERN PetscLogEvent MAT_GetSeqNonzeroStructure;
1787: PETSC_EXTERN PetscLogEvent MATMFFD_Mult;
1788: PETSC_EXTERN PetscLogEvent MAT_GetMultiProcBlock;
1789: PETSC_EXTERN PetscLogEvent MAT_CUSPARSECopyToGPU;
1790: PETSC_EXTERN PetscLogEvent MAT_CUSPARSECopyFromGPU;
1791: PETSC_EXTERN PetscLogEvent MAT_CUSPARSEGenerateTranspose;
1792: PETSC_EXTERN PetscLogEvent MAT_CUSPARSESolveAnalysis;
1793: PETSC_EXTERN PetscLogEvent MAT_HIPSPARSECopyToGPU;
1794: PETSC_EXTERN PetscLogEvent MAT_HIPSPARSECopyFromGPU;
1795: PETSC_EXTERN PetscLogEvent MAT_HIPSPARSEGenerateTranspose;
1796: PETSC_EXTERN PetscLogEvent MAT_HIPSPARSESolveAnalysis;
1797: PETSC_EXTERN PetscLogEvent MAT_SetValuesBatch;
1798: PETSC_EXTERN PetscLogEvent MAT_CreateGraph;
1799: PETSC_EXTERN PetscLogEvent MAT_ViennaCLCopyToGPU;
1800: PETSC_EXTERN PetscLogEvent MAT_DenseCopyToGPU;
1801: PETSC_EXTERN PetscLogEvent MAT_DenseCopyFromGPU;
1802: PETSC_EXTERN PetscLogEvent MAT_Merge;
1803: PETSC_EXTERN PetscLogEvent MAT_Residual;
1804: PETSC_EXTERN PetscLogEvent MAT_SetRandom;
1805: PETSC_EXTERN PetscLogEvent MAT_FactorFactS;
1806: PETSC_EXTERN PetscLogEvent MAT_FactorInvS;
1807: PETSC_EXTERN PetscLogEvent MAT_PreallCOO;
1808: PETSC_EXTERN PetscLogEvent MAT_SetVCOO;
1809: PETSC_EXTERN PetscLogEvent MATCOLORING_Apply;
1810: PETSC_EXTERN PetscLogEvent MATCOLORING_Comm;
1811: PETSC_EXTERN PetscLogEvent MATCOLORING_Local;
1812: PETSC_EXTERN PetscLogEvent MATCOLORING_ISCreate;
1813: PETSC_EXTERN PetscLogEvent MATCOLORING_SetUp;
1814: PETSC_EXTERN PetscLogEvent MATCOLORING_Weights;
1815: PETSC_EXTERN PetscLogEvent MAT_H2Opus_Build;
1816: PETSC_EXTERN PetscLogEvent MAT_H2Opus_Compress;
1817: PETSC_EXTERN PetscLogEvent MAT_H2Opus_Orthog;
1818: PETSC_EXTERN PetscLogEvent MAT_H2Opus_LR;
1819: PETSC_EXTERN PetscLogEvent MAT_CUDACopyToGPU;
1820: PETSC_EXTERN PetscLogEvent MAT_HIPCopyToGPU;
1822: #if defined(PETSC_CLANG_STATIC_ANALYZER)
1823: #define MatGetDiagonalMarkers(SeqXXX, bs)
1824: #else
1825: /*
1826: Adds diagonal pointers to sparse matrix nonzero structure and determines if all diagonal entries are present
1828: Rechecks the matrix data structure automatically if the nonzero structure of the matrix changed since the last call
1830: Potential optimization: since the a->j[j] are sorted this could use bisection to find the diagonal
1832: Developer Note:
1833: Uses the C preprocessor as a template mechanism to produce MatGetDiagonal_Seq[SB]AIJ() to avoid duplicate code
1834: */
1835: #define MatGetDiagonalMarkers(SeqXXX, bs) \
1836: PetscErrorCode MatGetDiagonalMarkers_##SeqXXX(Mat A, const PetscInt **diag, PetscBool *diagDense) \
1837: { \
1838: Mat_##SeqXXX *a = (Mat_##SeqXXX *)A->data; \
1839: \
1840: PetscFunctionBegin; \
1841: if (A->factortype != MAT_FACTOR_NONE) { \
1842: if (diagDense) *diagDense = PETSC_TRUE; \
1843: if (diag) *diag = a->diag; \
1844: PetscFunctionReturn(PETSC_SUCCESS); \
1845: } \
1846: PetscCheck(diag || diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "At least one of diag or diagDense must be requested"); \
1847: if (a->diagNonzeroState != A->nonzerostate || (diag && !a->diag)) { \
1848: const PetscInt m = A->rmap->n / bs; \
1849: \
1850: if (!diag && !a->diag) { \
1851: a->diagDense = PETSC_TRUE; \
1852: for (PetscInt i = 0; i < m; i++) { \
1853: PetscBool found = PETSC_FALSE; \
1854: \
1855: for (PetscInt j = a->i[i]; j < a->i[i + 1]; j++) { \
1856: if (a->j[j] == i) { \
1857: found = PETSC_TRUE; \
1858: break; \
1859: } \
1860: } \
1861: if (!found) { \
1862: a->diagDense = PETSC_FALSE; \
1863: *diagDense = a->diagDense; \
1864: a->diagNonzeroState = A->nonzerostate; \
1865: PetscFunctionReturn(PETSC_SUCCESS); \
1866: } \
1867: } \
1868: } else { \
1869: if (!a->diag) PetscCall(PetscMalloc1(m, &a->diag)); \
1870: a->diagDense = PETSC_TRUE; \
1871: for (PetscInt i = 0; i < m; i++) { \
1872: PetscBool found = PETSC_FALSE; \
1873: \
1874: a->diag[i] = a->i[i + 1]; \
1875: for (PetscInt j = a->i[i]; j < a->i[i + 1]; j++) { \
1876: if (a->j[j] == i) { \
1877: a->diag[i] = j; \
1878: found = PETSC_TRUE; \
1879: break; \
1880: } \
1881: } \
1882: if (!found) a->diagDense = PETSC_FALSE; \
1883: } \
1884: } \
1885: a->diagNonzeroState = A->nonzerostate; \
1886: } \
1887: if (diag) *diag = a->diag; \
1888: if (diagDense) *diagDense = a->diagDense; \
1889: PetscFunctionReturn(PETSC_SUCCESS); \
1890: }
1891: #endif