Actual source code: sell.h

  1: #pragma once

  3: #include <petsc/private/matimpl.h>
  4: #include <petsc/private/hashmapi.h>

  6: /*
  7:  For NVIDIA GPUs each slice should be padded to the boundary of 16 elements for best performance.
  8:  The optimal memory alignment in device memory is 128 bytes, 64 bytes, 32 bytes for double precision, single precision and half precision.
  9: */
 10: #if defined(PETSC_HAVE_DEVICE)
 11:   #define DEVICE_MEM_ALIGN 16
 12: #endif

 14: /*
 15:  Struct header for SeqSELL matrix format
 16: */
 17: #define SEQSELLHEADER(datatype) \
 18:   PetscBool    roworiented;        /* if true, row-oriented input, default */ \
 19:   PetscInt     nonew;              /* 1 don't add new nonzeros, -1 generate error on new */ \
 20:   PetscInt     nounused;           /* -1 generate error on unused space */ \
 21:   PetscBool    singlemalloc;       /* if true a, i, and j have been obtained with one big malloc */ \
 22:   PetscInt     maxallocmat;        /* max allocated space for the matrix */ \
 23:   PetscInt     maxallocrow;        /* max allocated space for each row */ \
 24:   PetscInt     nz;                 /* actual nonzeros */ \
 25:   PetscInt     rlenmax;            /* max actual row length, rmax cannot exceed maxallocrow */ \
 26:   PetscInt    *rlen;               /* actual length of each row (padding zeros excluded) */ \
 27:   PetscBool    free_rlen;          /* free rlen array ? */ \
 28:   PetscInt     reallocs;           /* number of mallocs done during MatSetValues() \
 29: as more values are set than were prealloced */ \
 30:   PetscBool    keepnonzeropattern; /* keeps matrix structure same in calls to MatZeroRows()*/ \
 31:   PetscBool    ignorezeroentries; \
 32:   PetscBool    free_colidx;     /* free the column indices colidx when the matrix is destroyed */ \
 33:   PetscBool    free_val;        /* free the numerical values when matrix is destroy */ \
 34:   PetscInt    *colidx;          /* column index */ \
 35:   PetscInt    *diag;            /* pointers to diagonal elements */ \
 36:   PetscInt     nonzerorowcnt;   /* how many rows have nonzero entries */ \
 37:   PetscBool    free_diag;       /* free diag ? */ \
 38:   datatype    *val;             /* elements including nonzeros and padding zeros */ \
 39:   PetscScalar *solve_work;      /* work space used in MatSolve */ \
 40:   IS           row, col, icol;  /* index sets, used for reorderings */ \
 41:   PetscBool    pivotinblocks;   /* pivot inside factorization of each diagonal block */ \
 42:   Mat          parent;          /* set if this matrix was formed with MatDuplicate(...,MAT_SHARE_NONZERO_PATTERN,....);
 43: means that this shares some data structures with the parent including diag, ilen, imax, i, j */ \
 44:   PetscInt    *sliidx;          /* slice index */ \
 45:   PetscInt     totalslices;     /* total number of slices */ \
 46:   PetscInt     sliceheight;     /* slice height */ \
 47:   PetscReal    fillratio;       /* ratio of number of padded zeros over total number of elements  */ \
 48:   PetscReal    avgslicewidth;   /* average slice width */ \
 49:   PetscInt     maxslicewidth;   /* maximum slice width */ \
 50:   PetscReal    varslicesize;    /* variance of slice size */ \
 51:   PetscInt    *sliperm;         /* slice permutation array, CUDA only */ \
 52:   PetscInt     totalblocks;     /* total number of blocks, CUDA only */ \
 53:   PetscInt    *blockidx;        /* block index, CUDA only */ \
 54:   PetscInt    *block_row_map;   /* starting row of the current block, CUDA only */ \
 55:   PetscInt     chunksize;       /* chunk size, CUDA only */ \
 56:   PetscInt     totalchunks;     /* total number of chunks, CUDA only */ \
 57:   PetscInt    *chunk_slice_map; /* starting slice of the current chunk, CUDA only */ \
 58:   PetscInt    *getrowcols;      /* workarray for MatGetRow_SeqSELL */ \
 59:   PetscScalar *getrowvals       /* workarray for MatGetRow_SeqSELL */

 61: typedef struct {
 62:   SEQSELLHEADER(MatScalar);
 63:   MatScalar   *saved_values;              /* location for stashing nonzero values of matrix */
 64:   PetscScalar *idiag, *mdiag, *ssor_work; /* inverse of diagonal entries, diagonal values and workspace for Eisenstat trick */
 65:   PetscBool    idiagvalid;                /* current idiag[] and mdiag[] are valid */
 66:   PetscScalar  fshift, omega;             /* last used omega and fshift */
 67:   ISColoring   coloring;                  /* set with MatADSetColoring() used by MatADSetValues() */
 68: } Mat_SeqSELL;

 70: /*
 71:  Frees the arrays from the XSELLPACK matrix type
 72:  */
 73: static inline PetscErrorCode MatSeqXSELLFreeSELL(Mat AA, MatScalar **val, PetscInt **colidx)
 74: {
 75:   Mat_SeqSELL *A = (Mat_SeqSELL *)AA->data;
 76:   if (A->singlemalloc) {
 77:     PetscCall(PetscFree2(*val, *colidx));
 78:   } else {
 79:     if (A->free_val) PetscCall(PetscFree(*val));
 80:     if (A->free_colidx) PetscCall(PetscFree(*colidx));
 81:   }
 82:   return PETSC_SUCCESS;
 83: }

 85: #define MatSeqXSELLReallocateSELL(Amat, AM, BS2, WIDTH, SIDX, SH, SID, ROW, COL, COLIDX, VAL, CP, VP, NONEW, datatype, MUL) \
 86:   do { \
 87:     if (WIDTH >= (SIDX[SID + 1] - SIDX[SID]) / SH) { \
 88:       Mat_SeqSELL *Ain = (Mat_SeqSELL *)Amat->data; \
 89:       /* there is no extra room in row, therefore enlarge 1 slice column */ \
 90:       PetscInt  new_size = Ain->maxallocmat + SH * MUL, *new_colidx; \
 91:       datatype *new_val; \
 92: \
 93:       PetscCheck(NONEW != -2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "New nonzero at (%" PetscInt_FMT ",%" PetscInt_FMT ") caused a malloc. Use MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE) to turn off this check", ROW, COL); \
 94:       /* malloc new storage space */ \
 95:       PetscCall(PetscMalloc2(BS2 *new_size, &new_val, BS2 *new_size, &new_colidx)); \
 96: \
 97:       /* copy over old data into new slots by two steps: one step for data before the current slice and the other for the rest */ \
 98:       PetscCall(PetscArraycpy(new_val, VAL, SIDX[SID + 1])); \
 99:       PetscCall(PetscArraycpy(new_colidx, COLIDX, SIDX[SID + 1])); \
100:       PetscCall(PetscArraycpy(new_val + SIDX[SID + 1] + SH * MUL, VAL + SIDX[SID + 1], SIDX[Ain->totalslices] - SIDX[SID + 1])); \
101:       PetscCall(PetscArraycpy(new_colidx + SIDX[SID + 1] + SH * MUL, COLIDX + SIDX[SID + 1], SIDX[Ain->totalslices] - SIDX[SID + 1])); \
102:       /* update slice_idx */ \
103:       for (ii = SID + 1; ii <= Ain->totalslices; ii++) { SIDX[ii] += SH * MUL; } \
104:       /* update pointers. Notice that they point to the FIRST position of the row */ \
105:       CP = new_colidx + SIDX[SID] + (ROW % SH); \
106:       VP = new_val + SIDX[SID] + (ROW % SH); \
107:       /* free up old matrix storage */ \
108:       PetscCall(MatSeqXSELLFreeSELL(A, &Ain->val, &Ain->colidx)); \
109:       Ain->val          = new_val; \
110:       Ain->colidx       = new_colidx; \
111:       Ain->singlemalloc = PETSC_TRUE; \
112:       Ain->maxallocmat  = new_size; \
113:       Ain->reallocs++; \
114:       A->nonzerostate++; \
115:       if (WIDTH >= Ain->maxallocrow) Ain->maxallocrow += MUL; \
116:       if (WIDTH >= Ain->rlenmax) Ain->rlenmax++; \
117:     } \
118:   } while (0)

120: #define MatSetValue_SeqSELL_Private(A, row, col, value, addv, orow, ocol, cp, vp, lastcol, low, high) \
121:   do { \
122:     Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; \
123:     found          = PETSC_FALSE; \
124:     if (col <= lastcol) low = 0; \
125:     else high = a->rlen[row]; \
126:     lastcol = col; \
127:     while (high - low > 5) { \
128:       t = (low + high) / 2; \
129:       if (*(cp + a->sliceheight * t) > col) high = t; \
130:       else low = t; \
131:     } \
132:     for (_i = low; _i < high; _i++) { \
133:       if (*(cp + a->sliceheight * _i) > col) break; \
134:       if (*(cp + a->sliceheight * _i) == col) { \
135:         if (addv == ADD_VALUES) *(vp + a->sliceheight * _i) += value; \
136:         else *(vp + a->sliceheight * _i) = value; \
137:         found = PETSC_TRUE; \
138:         break; \
139:       } \
140:     } \
141:     if (!found) { \
142:       PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
143:       if (a->nonew != 1 && !(value == 0.0 && a->ignorezeroentries) && a->rlen[row] >= (a->sliidx[row / a->sliceheight + 1] - a->sliidx[row / a->sliceheight]) / a->sliceheight) { \
144:         /* there is no extra room in row, therefore enlarge 1 slice column */ \
145:         if (a->maxallocmat < a->sliidx[a->totalslices] + a->sliceheight) { \
146:           /* allocates a larger array for the XSELL matrix types; only extend the current slice by one more column. */ \
147:           PetscInt   new_size = a->maxallocmat + a->sliceheight, *new_colidx; \
148:           MatScalar *new_val; \
149:           PetscCheck(a->nonew != -2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "New nonzero at (%" PetscInt_FMT ",%" PetscInt_FMT ") caused a malloc. Use MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE) to turn off this check", orow, ocol); \
150:           /* malloc new storage space */ \
151:           PetscCall(PetscMalloc2(new_size, &new_val, new_size, &new_colidx)); \
152:           /* copy over old data into new slots by two steps: one step for data before the current slice and the other for the rest */ \
153:           PetscCall(PetscArraycpy(new_val, a->val, a->sliidx[row / a->sliceheight + 1])); \
154:           PetscCall(PetscArraycpy(new_colidx, a->colidx, a->sliidx[row / a->sliceheight + 1])); \
155:           PetscCall(PetscArraycpy(new_val + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, PetscSafePointerPlusOffset(a->val, a->sliidx[row / a->sliceheight + 1]), a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \
156:           PetscCall(PetscArraycpy(new_colidx + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, PetscSafePointerPlusOffset(a->colidx, a->sliidx[row / a->sliceheight + 1]), a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \
157:           /* update pointers. Notice that they point to the FIRST position of the row */ \
158:           cp = new_colidx + a->sliidx[row / a->sliceheight] + (row % a->sliceheight); \
159:           vp = new_val + a->sliidx[row / a->sliceheight] + (row % a->sliceheight); \
160:           /* free up old matrix storage */ \
161:           PetscCall(MatSeqXSELLFreeSELL(A, &a->val, &a->colidx)); \
162:           a->val          = new_val; \
163:           a->colidx       = new_colidx; \
164:           a->singlemalloc = PETSC_TRUE; \
165:           a->maxallocmat  = new_size; \
166:           a->reallocs++; \
167:         } else { \
168:           /* no need to reallocate, just shift the following slices to create space for the added slice column */ \
169:           PetscCall(PetscArraymove(a->val + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->val + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \
170:           PetscCall(PetscArraymove(a->colidx + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->colidx + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \
171:         } \
172:         /* update slice_idx */ \
173:         for (ii = row / a->sliceheight + 1; ii <= a->totalslices; ii++) a->sliidx[ii] += a->sliceheight; \
174:         if (a->rlen[row] >= a->maxallocrow) a->maxallocrow++; \
175:         if (a->rlen[row] >= a->rlenmax) a->rlenmax++; \
176:       } \
177:       /* shift up all the later entries in this row */ \
178:       for (ii = a->rlen[row] - 1; ii >= _i; ii--) { \
179:         *(cp + a->sliceheight * (ii + 1)) = *(cp + a->sliceheight * ii); \
180:         *(vp + a->sliceheight * (ii + 1)) = *(vp + a->sliceheight * ii); \
181:       } \
182:       *(cp + a->sliceheight * _i) = col; \
183:       *(vp + a->sliceheight * _i) = value; \
184:       a->nz++; \
185:       a->rlen[row]++; \
186:       A->nonzerostate++; \
187:       low = _i + 1; \
188:       high++; \
189:     } \
190:   } while (0)

192: #define PetscCeilIntMacro(x, y) ((((PetscInt)(x)) / ((PetscInt)(y))) + ((((PetscInt)(x)) % ((PetscInt)(y))) ? 1 : 0))

194: PETSC_INTERN PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat, PetscInt, const PetscInt[]);
195: PETSC_INTERN PetscErrorCode MatMult_SeqSELL(Mat, Vec, Vec);
196: PETSC_INTERN PetscErrorCode MatMultAdd_SeqSELL(Mat, Vec, Vec, Vec);
197: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqSELL(Mat, Vec, Vec);
198: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat, Vec, Vec, Vec);
199: PETSC_INTERN PetscErrorCode MatMissingDiagonal_SeqSELL(Mat, PetscBool *, PetscInt *);
200: PETSC_INTERN PetscErrorCode MatMarkDiagonal_SeqSELL(Mat);
201: PETSC_INTERN PetscErrorCode MatInvertDiagonal_SeqSELL(Mat, PetscScalar, PetscScalar);
202: PETSC_INTERN PetscErrorCode MatZeroEntries_SeqSELL(Mat);
203: PETSC_INTERN PetscErrorCode MatDestroy_SeqSELL(Mat);
204: PETSC_INTERN PetscErrorCode MatSetOption_SeqSELL(Mat, MatOption, PetscBool);
205: PETSC_INTERN PetscErrorCode MatGetDiagonal_SeqSELL(Mat, Vec v);
206: PETSC_INTERN PetscErrorCode MatGetValues_SeqSELL(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], PetscScalar[]);
207: PETSC_INTERN PetscErrorCode MatView_SeqSELL(Mat, PetscViewer);
208: PETSC_INTERN PetscErrorCode MatAssemblyEnd_SeqSELL(Mat, MatAssemblyType);
209: PETSC_INTERN PetscErrorCode MatGetInfo_SeqSELL(Mat, MatInfoType, MatInfo *);
210: PETSC_INTERN PetscErrorCode MatSetValues_SeqSELL(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
211: PETSC_INTERN PetscErrorCode MatCopy_SeqSELL(Mat, Mat, MatStructure);
212: PETSC_INTERN PetscErrorCode MatSetUp_SeqSELL(Mat);
213: PETSC_INTERN PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat, PetscScalar *[]);
214: PETSC_INTERN PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat, PetscScalar *[]);
215: PETSC_INTERN PetscErrorCode MatShift_SeqSELL(Mat, PetscScalar);
216: PETSC_INTERN PetscErrorCode MatSOR_SeqSELL(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec);
217: PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat);
218: PETSC_INTERN PetscErrorCode MatDuplicate_SeqSELL(Mat, MatDuplicateOption, Mat *);
219: PETSC_INTERN PetscErrorCode MatEqual_SeqSELL(Mat, Mat, PetscBool *);
220: PETSC_INTERN PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat);
221: PETSC_INTERN PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat, MatType, MatReuse, Mat *);
222: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat, MatType, MatReuse, Mat *);
223: PETSC_INTERN PetscErrorCode MatFDColoringCreate_SeqSELL(Mat, ISColoring, MatFDColoring);
224: PETSC_INTERN PetscErrorCode MatFDColoringSetUp_SeqSELL(Mat, ISColoring, MatFDColoring);
225: PETSC_INTERN PetscErrorCode MatGetColumnIJ_SeqSELL_Color(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscInt *[], PetscBool *);
226: PETSC_INTERN PetscErrorCode MatRestoreColumnIJ_SeqSELL_Color(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscInt *[], PetscBool *);
227: PETSC_INTERN PetscErrorCode MatConjugate_SeqSELL(Mat A);
228: PETSC_INTERN PetscErrorCode MatScale_SeqSELL(Mat, PetscScalar);
229: PETSC_INTERN PetscErrorCode MatDiagonalScale_SeqSELL(Mat, Vec, Vec);