Actual source code: aijmkl.c
  1: /*
  2:   Defines basic operations for the MATSEQAIJMKL matrix class.
  3:   This class is derived from the MATSEQAIJ class and retains the
  4:   compressed row storage (aka Yale sparse matrix format) but uses
  5:   sparse BLAS operations from the Intel Math Kernel Library (MKL)
  6:   wherever possible.
  7: */

  9: #include <../src/mat/impls/aij/seq/aij.h>
 10: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
 11: #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
 12:   #define MKL_ILP64
 13: #endif
 14: #include <mkl_spblas.h>

 16: typedef struct {
 17:   PetscBool        no_SpMV2;         /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
 18:   PetscBool        eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
 19:   PetscBool        sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
 20:   PetscObjectState state;
 21: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 22:   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
 23:   struct matrix_descr descr;
 24: #endif
 25: } Mat_SeqAIJMKL;

 27: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType);

 29: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
 30: {
 31:   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
 32:   /* so we will ignore 'MatType type'. */
 33:   Mat B = *newmat;
 34: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 35:   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
 36: #endif

 38:   PetscFunctionBegin;
 39:   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));

 41:   /* Reset the original function pointers. */
 42:   B->ops->duplicate               = MatDuplicate_SeqAIJ;
 43:   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
 44:   B->ops->destroy                 = MatDestroy_SeqAIJ;
 45:   B->ops->mult                    = MatMult_SeqAIJ;
 46:   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
 47:   B->ops->multadd                 = MatMultAdd_SeqAIJ;
 48:   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
 49:   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
 50:   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
 51:   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
 52:   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
 53:   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
 54:   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;

 56:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL));

 58: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 59:   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
 60:    * simply involves destroying the MKL sparse matrix handle and then freeing
 61:    * the spptr pointer. */
 62:   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;

 64:   if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
 65: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
 66:   PetscCall(PetscFree(B->spptr));

 68:   /* Change the type of B to MATSEQAIJ. */
 69:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));

 71:   *newmat = B;
 72:   PetscFunctionReturn(PETSC_SUCCESS);
 73: }

 75: static PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
 76: {
 77:   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;

 79:   PetscFunctionBegin;
 80:   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
 81:   if (aijmkl) {
 82:     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
 83: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 84:     if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
 85: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
 86:     PetscCall(PetscFree(A->spptr));
 87:   }

 89:   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
 90:    * to destroy everything that remains. */
 91:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
 92:   /* I don't call MatSetType().  I believe this is because that
 93:    * is only to be called when *building* a matrix.  I could be wrong, but
 94:    * that is how things work for the SuperLU matrix class. */
 95:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL));
 96:   PetscCall(MatDestroy_SeqAIJ(A));
 97:   PetscFunctionReturn(PETSC_SUCCESS);
 98: }

100: /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
101:  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
102:  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
103:  * handle, creates a new one, and then calls mkl_sparse_optimize().
104:  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
105:  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
106:  * an unoptimized matrix handle here. */
107: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
108: {
109: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
110:   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
111:    * does nothing. We make it callable anyway in this case because it cuts
112:    * down on littering the code with #ifdefs. */
113:   PetscFunctionBegin;
114:   PetscFunctionReturn(PETSC_SUCCESS);
115: #else
116:   Mat_SeqAIJ    *a      = (Mat_SeqAIJ *)A->data;
117:   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
118:   PetscInt       m, n;
119:   MatScalar     *aa;
120:   PetscInt      *aj, *ai;

122:   PetscFunctionBegin;
123:   #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
124:   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
125:    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
126:    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
127:    * mkl_sparse_optimize() later. */
128:   if (aijmkl->no_SpMV2) PetscFunctionReturn(PETSC_SUCCESS);
129:   #endif

131:   if (aijmkl->sparse_optimized) {
132:     /* Matrix has been previously assembled and optimized. Must destroy old
133:      * matrix handle before running the optimization step again. */
134:     PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
135:   }
136:   aijmkl->sparse_optimized = PETSC_FALSE;

138:   /* Now perform the SpMV2 setup and matrix optimization. */
139:   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
140:   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
141:   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
142:   m                  = A->rmap->n;
143:   n                  = A->cmap->n;
144:   aj                 = a->j; /* aj[k] gives column index for element aa[k]. */
145:   aa                 = a->a; /* Nonzero elements stored row-by-row. */
146:   ai                 = a->i; /* ai[k] is the position in aa and aj where row k starts. */
147:   if (a->nz && aa && !A->structure_only) {
148:     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
149:      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
150:     PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, (MKL_INT)m, (MKL_INT)n, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa);
151:     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
152:     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
153:     if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
154:     aijmkl->sparse_optimized = PETSC_TRUE;
155:     PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
156:   } else {
157:     aijmkl->csrA = NULL;
158:   }
159:   PetscFunctionReturn(PETSC_SUCCESS);
160: #endif
161: }

163: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
164: /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
165: static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A)
166: {
167:   sparse_index_base_t indexing;
168:   PetscInt            m, n;
169:   PetscInt           *aj, *ai, *dummy;
170:   MatScalar          *aa;
171:   Mat_SeqAIJMKL      *aijmkl;

173:   PetscFunctionBegin;
174:   if (csrA) {
175:     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
176:     PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, (MKL_INT *)&m, (MKL_INT *)&n, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
177:     PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
178:   } else {
179:     aj = ai = NULL;
180:     aa      = NULL;
181:   }

183:   PetscCall(MatSetType(A, MATSEQAIJ));
184:   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols));
185:   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
186:    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
187:    * they will be destroyed when the MKL handle is destroyed.
188:    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
189:   if (csrA) {
190:     PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL));
191:   } else {
192:     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
193:     PetscCall(MatSetUp(A));
194:     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
195:     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
196:   }

198:   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
199:    * Now turn it into a MATSEQAIJMKL. */
200:   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));

202:   aijmkl       = (Mat_SeqAIJMKL *)A->spptr;
203:   aijmkl->csrA = csrA;

205:   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
206:    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
207:    * and just need to be able to run the MKL optimization step. */
208:   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
209:   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
210:   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
211:   if (csrA) {
212:     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
213:     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
214:   }
215:   PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
216:   PetscFunctionReturn(PETSC_SUCCESS);
217: }
218: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

220: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
221:  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
222:  * MatMatMultNumeric(). */
223: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
224: static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
225: {
226:   PetscInt            i;
227:   PetscInt            nrows, ncols;
228:   PetscInt            nz;
229:   PetscInt           *ai, *aj, *dummy;
230:   PetscScalar        *aa;
231:   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
232:   sparse_index_base_t indexing;

234:   PetscFunctionBegin;
235:   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
236:   if (!aijmkl->csrA) PetscFunctionReturn(PETSC_SUCCESS);

238:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
239:   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);

241:   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
242:    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
243:   for (i = 0; i < nrows; i++) {
244:     nz = ai[i + 1] - ai[i];
245:     PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES));
246:   }

248:   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
249:   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));

251:   PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
252:   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
253:    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
254:    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
255:   aijmkl->sparse_optimized = PETSC_FALSE;
256:   PetscFunctionReturn(PETSC_SUCCESS);
257: }
258: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

260: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
261: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer)
262: {
263:   PetscInt            i, j, k;
264:   PetscInt            nrows, ncols;
265:   PetscInt            nz;
266:   PetscInt           *ai, *aj, *dummy;
267:   PetscScalar        *aa;
268:   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
269:   sparse_index_base_t indexing;

271:   PetscFunctionBegin;
272:   PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));

274:   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
275:   if (!aijmkl->csrA) {
276:     PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n"));
277:     PetscFunctionReturn(PETSC_SUCCESS);
278:   }

280:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
281:   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);

283:   k = 0;
284:   for (i = 0; i < nrows; i++) {
285:     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i));
286:     nz = ai[i + 1] - ai[i];
287:     for (j = 0; j < nz; j++) {
288:       if (aa) {
289:         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g)  ", aj[k], PetscRealPart(aa[k])));
290:       } else {
291:         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]));
292:       }
293:       k++;
294:     }
295:     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
296:   }
297:   PetscFunctionReturn(PETSC_SUCCESS);
298: }
299: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

301: static PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
302: {
303:   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
304:   Mat_SeqAIJMKL *aijmkl_dest;

306:   PetscFunctionBegin;
307:   PetscCall(MatDuplicate_SeqAIJ(A, op, M));
308:   aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
309:   PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1));
310:   aijmkl_dest->sparse_optimized = PETSC_FALSE;
311:   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
312:   PetscFunctionReturn(PETSC_SUCCESS);
313: }

315: static PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
316: {
317:   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data;
318:   Mat_SeqAIJMKL *aijmkl;

320:   PetscFunctionBegin;
321:   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);

323:   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
324:    * extra information and some different methods, call the AssemblyEnd
325:    * routine for a MATSEQAIJ.
326:    * I'm not sure if this is the best way to do this, but it avoids
327:    * a lot of code duplication. */
328:   a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
329:   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));

331:   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
332:    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
333:   aijmkl = (Mat_SeqAIJMKL *)A->spptr;
334:   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
335:   PetscFunctionReturn(PETSC_SUCCESS);
336: }

338: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
339: static PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy)
340: {
341:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
342:   const PetscScalar *x;
343:   PetscScalar       *y;
344:   const MatScalar   *aa;
345:   PetscInt           m     = A->rmap->n;
346:   PetscInt           n     = A->cmap->n;
347:   PetscScalar        alpha = 1.0;
348:   PetscScalar        beta  = 0.0;
349:   const PetscInt    *aj, *ai;
350:   char               matdescra[6];

352:   /* Variables not in MatMult_SeqAIJ. */
353:   char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */

355:   PetscFunctionBegin;
356:   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
357:   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
358:   PetscCall(VecGetArrayRead(xx, &x));
359:   PetscCall(VecGetArray(yy, &y));
360:   aj = a->j; /* aj[k] gives column index for element aa[k]. */
361:   aa = a->a; /* Nonzero elements stored row-by-row. */
362:   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */

364:   /* Call MKL sparse BLAS routine to do the MatMult. */
365:   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);

367:   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
368:   PetscCall(VecRestoreArrayRead(xx, &x));
369:   PetscCall(VecRestoreArray(yy, &y));
370:   PetscFunctionReturn(PETSC_SUCCESS);
371: }
372: #endif

374: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
375: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
376: {
377:   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
378:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
379:   const PetscScalar *x;
380:   PetscScalar       *y;
381:   PetscObjectState   state;

383:   PetscFunctionBegin;
384:   /* If there are no nonzero entries, zero yy and return immediately. */
385:   if (!a->nz) {
386:     PetscCall(VecGetArray(yy, &y));
387:     PetscCall(PetscArrayzero(y, A->rmap->n));
388:     PetscCall(VecRestoreArray(yy, &y));
389:     PetscFunctionReturn(PETSC_SUCCESS);
390:   }

392:   PetscCall(VecGetArrayRead(xx, &x));
393:   PetscCall(VecGetArray(yy, &y));

395:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
396:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
397:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
398:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
399:   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));

401:   /* Call MKL SpMV2 executor routine to do the MatMult. */
402:   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);

404:   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
405:   PetscCall(VecRestoreArrayRead(xx, &x));
406:   PetscCall(VecRestoreArray(yy, &y));
407:   PetscFunctionReturn(PETSC_SUCCESS);
408: }
409: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

411: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
412: static PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy)
413: {
414:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
415:   const PetscScalar *x;
416:   PetscScalar       *y;
417:   const MatScalar   *aa;
418:   PetscInt           m     = A->rmap->n;
419:   PetscInt           n     = A->cmap->n;
420:   PetscScalar        alpha = 1.0;
421:   PetscScalar        beta  = 0.0;
422:   const PetscInt    *aj, *ai;
423:   char               matdescra[6];

425:   /* Variables not in MatMultTranspose_SeqAIJ. */
426:   char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */

428:   PetscFunctionBegin;
429:   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
430:   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
431:   PetscCall(VecGetArrayRead(xx, &x));
432:   PetscCall(VecGetArray(yy, &y));
433:   aj = a->j; /* aj[k] gives column index for element aa[k]. */
434:   aa = a->a; /* Nonzero elements stored row-by-row. */
435:   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */

437:   /* Call MKL sparse BLAS routine to do the MatMult. */
438:   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);

440:   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
441:   PetscCall(VecRestoreArrayRead(xx, &x));
442:   PetscCall(VecRestoreArray(yy, &y));
443:   PetscFunctionReturn(PETSC_SUCCESS);
444: }
445: #endif

447: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
448: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
449: {
450:   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
451:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
452:   const PetscScalar *x;
453:   PetscScalar       *y;
454:   PetscObjectState   state;

456:   PetscFunctionBegin;
457:   /* If there are no nonzero entries, zero yy and return immediately. */
458:   if (!a->nz) {
459:     PetscCall(VecGetArray(yy, &y));
460:     PetscCall(PetscArrayzero(y, A->cmap->n));
461:     PetscCall(VecRestoreArray(yy, &y));
462:     PetscFunctionReturn(PETSC_SUCCESS);
463:   }

465:   PetscCall(VecGetArrayRead(xx, &x));
466:   PetscCall(VecGetArray(yy, &y));

468:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
469:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
470:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
471:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
472:   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));

474:   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
475:   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);

477:   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
478:   PetscCall(VecRestoreArrayRead(xx, &x));
479:   PetscCall(VecRestoreArray(yy, &y));
480:   PetscFunctionReturn(PETSC_SUCCESS);
481: }
482: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

484: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
485: static PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
486: {
487:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
488:   const PetscScalar *x;
489:   PetscScalar       *y, *z;
490:   const MatScalar   *aa;
491:   PetscInt           m = A->rmap->n;
492:   PetscInt           n = A->cmap->n;
493:   const PetscInt    *aj, *ai;
494:   PetscInt           i;

496:   /* Variables not in MatMultAdd_SeqAIJ. */
497:   char        transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
498:   PetscScalar alpha  = 1.0;
499:   PetscScalar beta;
500:   char        matdescra[6];

502:   PetscFunctionBegin;
503:   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
504:   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */

506:   PetscCall(VecGetArrayRead(xx, &x));
507:   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
508:   aj = a->j; /* aj[k] gives column index for element aa[k]. */
509:   aa = a->a; /* Nonzero elements stored row-by-row. */
510:   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */

512:   /* Call MKL sparse BLAS routine to do the MatMult. */
513:   if (zz == yy) {
514:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
515:     beta = 1.0;
516:     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
517:   } else {
518:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
519:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
520:     beta = 0.0;
521:     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
522:     for (i = 0; i < m; i++) z[i] += y[i];
523:   }

525:   PetscCall(PetscLogFlops(2.0 * a->nz));
526:   PetscCall(VecRestoreArrayRead(xx, &x));
527:   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
528:   PetscFunctionReturn(PETSC_SUCCESS);
529: }
530: #endif

532: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
533: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
534: {
535:   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
536:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
537:   const PetscScalar *x;
538:   PetscScalar       *y, *z;
539:   PetscInt           m = A->rmap->n;
540:   PetscInt           i;

542:   /* Variables not in MatMultAdd_SeqAIJ. */
543:   PetscObjectState state;

545:   PetscFunctionBegin;
546:   /* If there are no nonzero entries, set zz = yy and return immediately. */
547:   if (!a->nz) {
548:     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
549:     PetscCall(PetscArraycpy(z, y, m));
550:     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
551:     PetscFunctionReturn(PETSC_SUCCESS);
552:   }

554:   PetscCall(VecGetArrayRead(xx, &x));
555:   PetscCall(VecGetArrayPair(yy, zz, &y, &z));

557:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
558:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
559:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
560:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
561:   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));

563:   /* Call MKL sparse BLAS routine to do the MatMult. */
564:   if (zz == yy) {
565:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
566:      * with alpha and beta both set to 1.0. */
567:     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
568:   } else {
569:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
570:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
571:     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
572:     for (i = 0; i < m; i++) z[i] += y[i];
573:   }

575:   PetscCall(PetscLogFlops(2.0 * a->nz));
576:   PetscCall(VecRestoreArrayRead(xx, &x));
577:   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
578:   PetscFunctionReturn(PETSC_SUCCESS);
579: }
580: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

582: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
583: static PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
584: {
585:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
586:   const PetscScalar *x;
587:   PetscScalar       *y, *z;
588:   const MatScalar   *aa;
589:   PetscInt           m = A->rmap->n;
590:   PetscInt           n = A->cmap->n;
591:   const PetscInt    *aj, *ai;
592:   PetscInt           i;

594:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
595:   char        transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
596:   PetscScalar alpha  = 1.0;
597:   PetscScalar beta;
598:   char        matdescra[6];

600:   PetscFunctionBegin;
601:   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
602:   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */

604:   PetscCall(VecGetArrayRead(xx, &x));
605:   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
606:   aj = a->j; /* aj[k] gives column index for element aa[k]. */
607:   aa = a->a; /* Nonzero elements stored row-by-row. */
608:   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */

610:   /* Call MKL sparse BLAS routine to do the MatMult. */
611:   if (zz == yy) {
612:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
613:     beta = 1.0;
614:     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
615:   } else {
616:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
617:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
618:     beta = 0.0;
619:     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
620:     for (i = 0; i < n; i++) z[i] += y[i];
621:   }

623:   PetscCall(PetscLogFlops(2.0 * a->nz));
624:   PetscCall(VecRestoreArrayRead(xx, &x));
625:   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
626:   PetscFunctionReturn(PETSC_SUCCESS);
627: }
628: #endif

630: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
631: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
632: {
633:   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
634:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
635:   const PetscScalar *x;
636:   PetscScalar       *y, *z;
637:   PetscInt           n = A->cmap->n;
638:   PetscInt           i;
639:   PetscObjectState   state;

641:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */

643:   PetscFunctionBegin;
644:   /* If there are no nonzero entries, set zz = yy and return immediately. */
645:   if (!a->nz) {
646:     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
647:     PetscCall(PetscArraycpy(z, y, n));
648:     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
649:     PetscFunctionReturn(PETSC_SUCCESS);
650:   }

652:   PetscCall(VecGetArrayRead(xx, &x));
653:   PetscCall(VecGetArrayPair(yy, zz, &y, &z));

655:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
656:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
657:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
658:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
659:   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));

661:   /* Call MKL sparse BLAS routine to do the MatMult. */
662:   if (zz == yy) {
663:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
664:      * with alpha and beta both set to 1.0. */
665:     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
666:   } else {
667:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
668:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
669:     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
670:     for (i = 0; i < n; i++) z[i] += y[i];
671:   }

673:   PetscCall(PetscLogFlops(2.0 * a->nz));
674:   PetscCall(VecRestoreArrayRead(xx, &x));
675:   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
676:   PetscFunctionReturn(PETSC_SUCCESS);
677: }
678: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

680: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
681: static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
682: {
683:   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
684:   sparse_matrix_t     csrA, csrB, csrC;
685:   PetscInt            nrows, ncols;
686:   struct matrix_descr descr_type_gen;
687:   PetscObjectState    state;

689:   PetscFunctionBegin;
690:   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
691:    * not handle sparse matrices with zero rows or columns. */
692:   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
693:   else nrows = A->cmap->N;
694:   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
695:   else ncols = B->rmap->N;

697:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
698:   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
699:   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
700:   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
701:   csrA                = a->csrA;
702:   csrB                = b->csrA;
703:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

705:   if (csrA && csrB) {
706:     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
707:   } else {
708:     csrC = NULL;
709:   }

711:   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C));
712:   PetscFunctionReturn(PETSC_SUCCESS);
713: }

715: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
716: {
717:   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
718:   sparse_matrix_t     csrA, csrB, csrC;
719:   struct matrix_descr descr_type_gen;
720:   PetscObjectState    state;

722:   PetscFunctionBegin;
723:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
724:   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
725:   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
726:   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
727:   csrA                = a->csrA;
728:   csrB                = b->csrA;
729:   csrC                = c->csrA;
730:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

732:   if (csrA && csrB) {
733:     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
734:   } else {
735:     csrC = NULL;
736:   }

738:   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
739:   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
740:   PetscFunctionReturn(PETSC_SUCCESS);
741: }

743: PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
744: {
745:   PetscFunctionBegin;
746:   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
747:   PetscFunctionReturn(PETSC_SUCCESS);
748: }

750: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
751: {
752:   PetscFunctionBegin;
753:   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
754:   PetscFunctionReturn(PETSC_SUCCESS);
755: }

757: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
758: {
759:   PetscFunctionBegin;
760:   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
761:   PetscFunctionReturn(PETSC_SUCCESS);
762: }

764: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
765: {
766:   PetscFunctionBegin;
767:   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
768:   PetscFunctionReturn(PETSC_SUCCESS);
769: }

771: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
772: {
773:   PetscFunctionBegin;
774:   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
775:   PetscFunctionReturn(PETSC_SUCCESS);
776: }

778: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
779: {
780:   PetscFunctionBegin;
781:   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
782:   PetscFunctionReturn(PETSC_SUCCESS);
783: }

785: static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
786: {
787:   Mat_Product *product = C->product;
788:   Mat          A = product->A, B = product->B;

790:   PetscFunctionBegin;
791:   PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C));
792:   PetscFunctionReturn(PETSC_SUCCESS);
793: }

795: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
796: {
797:   Mat_Product *product = C->product;
798:   Mat          A = product->A, B = product->B;
799:   PetscReal    fill = product->fill;

801:   PetscFunctionBegin;
802:   PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C));
803:   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
804:   PetscFunctionReturn(PETSC_SUCCESS);
805: }

807: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C)
808: {
809:   Mat                 Ct;
810:   Vec                 zeros;
811:   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
812:   sparse_matrix_t     csrA, csrP, csrC;
813:   PetscBool           set, flag;
814:   struct matrix_descr descr_type_sym;
815:   PetscObjectState    state;

817:   PetscFunctionBegin;
818:   PetscCall(MatIsSymmetricKnown(A, &set, &flag));
819:   PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");

821:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
822:   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
823:   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
824:   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
825:   csrA                = a->csrA;
826:   csrP                = p->csrA;
827:   csrC                = c->csrA;
828:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
829:   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
830:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

832:   /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
833:   PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);

835:   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
836:    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
837:    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
838:    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
839:    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
840:    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
841:    * the full matrix. */
842:   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
843:   PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct));
844:   PetscCall(MatCreateVecs(C, &zeros, NULL));
845:   PetscCall(VecSetFromOptions(zeros));
846:   PetscCall(VecZeroEntries(zeros));
847:   PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES));
848:   PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN));
849:   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
850:   PetscCall(MatProductCreateWithMat(A, P, NULL, C));
851:   PetscCall(MatProductSetType(C, MATPRODUCT_PtAP));
852:   PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
853:   PetscCall(VecDestroy(&zeros));
854:   PetscCall(MatDestroy(&Ct));
855:   PetscFunctionReturn(PETSC_SUCCESS);
856: }

858: PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
859: {
860:   Mat_Product        *product = C->product;
861:   Mat                 A = product->A, P = product->B;
862:   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
863:   sparse_matrix_t     csrA, csrP, csrC;
864:   struct matrix_descr descr_type_sym;
865:   PetscObjectState    state;

867:   PetscFunctionBegin;
868:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
869:   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
870:   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
871:   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
872:   csrA                = a->csrA;
873:   csrP                = p->csrA;
874:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
875:   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
876:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

878:   /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
879:   if (csrP && csrA) {
880:     PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
881:   } else {
882:     csrC = NULL;
883:   }

885:   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
886:    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
887:    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
888:    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
889:    * is guaranteed. */
890:   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C));

892:   C->ops->productnumeric = MatProductNumeric_PtAP;
893:   PetscFunctionReturn(PETSC_SUCCESS);
894: }

896: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
897: {
898:   PetscFunctionBegin;
899:   C->ops->productsymbolic = MatProductSymbolic_AB;
900:   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
901:   PetscFunctionReturn(PETSC_SUCCESS);
902: }

904: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
905: {
906:   PetscFunctionBegin;
907:   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
908:   PetscFunctionReturn(PETSC_SUCCESS);
909: }

911: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
912: {
913:   PetscFunctionBegin;
914:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
915:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
916:   PetscFunctionReturn(PETSC_SUCCESS);
917: }

919: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
920: {
921:   Mat_Product *product = C->product;
922:   Mat          A       = product->A;
923:   PetscBool    set, flag;

925:   PetscFunctionBegin;
926:   if (PetscDefined(USE_COMPLEX)) {
927:     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
928:      * We do this in several other locations in this file. This works for the time being, but these
929:      * routines are considered unsafe and may be removed from the MatProduct code in the future.
930:      * TODO: Add proper MATSEQAIJMKL implementations */
931:     C->ops->productsymbolic = NULL;
932:   } else {
933:     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
934:     PetscCall(MatIsSymmetricKnown(A, &set, &flag));
935:     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
936:     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
937:     /* we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
938:      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
939:   }
940:   PetscFunctionReturn(PETSC_SUCCESS);
941: }

943: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
944: {
945:   PetscFunctionBegin;
946:   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
947:   PetscFunctionReturn(PETSC_SUCCESS);
948: }

950: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
951: {
952:   PetscFunctionBegin;
953:   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
954:   PetscFunctionReturn(PETSC_SUCCESS);
955: }

957: PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
958: {
959:   Mat_Product *product = C->product;

961:   PetscFunctionBegin;
962:   switch (product->type) {
963:   case MATPRODUCT_AB:
964:     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C));
965:     break;
966:   case MATPRODUCT_AtB:
967:     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C));
968:     break;
969:   case MATPRODUCT_ABt:
970:     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C));
971:     break;
972:   case MATPRODUCT_PtAP:
973:     PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C));
974:     break;
975:   case MATPRODUCT_RARt:
976:     PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C));
977:     break;
978:   case MATPRODUCT_ABC:
979:     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C));
980:     break;
981:   default:
982:     break;
983:   }
984:   PetscFunctionReturn(PETSC_SUCCESS);
985: }
986: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

988: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
989:  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
990:  * routine, but can also be used to convert an assembled SeqAIJ matrix
991:  * into a SeqAIJMKL one. */
992: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
993: {
994:   Mat            B = *newmat;
995:   Mat_SeqAIJMKL *aijmkl;
996:   PetscBool      set;
997:   PetscBool      sametype;

999:   PetscFunctionBegin;
1000:   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));

1002:   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
1003:   if (sametype) PetscFunctionReturn(PETSC_SUCCESS);

1005:   PetscCall(PetscNew(&aijmkl));
1006:   B->spptr = (void *)aijmkl;

1008:   /* Set function pointers for methods that we inherit from AIJ but override.
1009:    * We also parse some command line options below, since those determine some of the methods we point to. */
1010:   B->ops->duplicate   = MatDuplicate_SeqAIJMKL;
1011:   B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
1012:   B->ops->destroy     = MatDestroy_SeqAIJMKL;

1014:   aijmkl->sparse_optimized = PETSC_FALSE;
1015: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1016:   aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
1017: #else
1018:   aijmkl->no_SpMV2 = PETSC_TRUE;
1019: #endif
1020:   aijmkl->eager_inspection = PETSC_FALSE;

1022:   /* Parse command line options. */
1023:   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
1024:   PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set));
1025:   PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set));
1026:   PetscOptionsEnd();
1027: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1028:   if (!aijmkl->no_SpMV2) {
1029:     PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n"));
1030:     aijmkl->no_SpMV2 = PETSC_TRUE;
1031:   }
1032: #endif

1034: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1035:   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
1036:   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
1037:   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
1038:   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1039:   #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1040:   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1041:   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1042:   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1043:   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1044:   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1045:     #if !defined(PETSC_USE_COMPLEX)
1046:   B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1047:     #else
1048:   B->ops->ptapnumeric = NULL;
1049:     #endif
1050:   #endif
1051: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

1053: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1054:   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1055:    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1056:    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1057:    * versions in which the older interface has not been deprecated, we use the old interface. */
1058:   if (aijmkl->no_SpMV2) {
1059:     B->ops->mult             = MatMult_SeqAIJMKL;
1060:     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1061:     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1062:     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1063:   }
1064: #endif

1066:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ));

1068:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL));
1069:   *newmat = B;
1070:   PetscFunctionReturn(PETSC_SUCCESS);
1071: }

1073: /*@C
1074:   MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`.

1076:   Collective

1078:   Input Parameters:
1079: + comm - MPI communicator, set to `PETSC_COMM_SELF`
1080: . m    - number of rows
1081: . n    - number of columns
1082: . nz   - number of nonzeros per row (same for all rows)
1083: - nnz  - array containing the number of nonzeros in the various rows
1084:          (possibly different for each row) or `NULL`

1086:   Output Parameter:
1087: . A - the matrix

1089:   Options Database Keys:
1090: + -mat_aijmkl_no_spmv2         - disable use of the SpMV2 inspector-executor routines
1091: - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection,
1092:                                   performing this step the first time the matrix is applied

1094:   Level: intermediate

1096:   Notes:
1097:   If `nnz` is given then `nz` is ignored

1099:   This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS
1100:   routines from Intel MKL whenever possible.

1102:   If the installed version of MKL supports the "SpMV2" sparse
1103:   inspector-executor routines, then those are used by default.

1105:   `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()`
1106:   (for symmetric A) operations are currently supported.
1107:   MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`.

1109: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
1110: @*/
1111: PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
1112: {
1113:   PetscFunctionBegin;
1114:   PetscCall(MatCreate(comm, A));
1115:   PetscCall(MatSetSizes(*A, m, n, m, n));
1116:   PetscCall(MatSetType(*A, MATSEQAIJMKL));
1117:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
1118:   PetscFunctionReturn(PETSC_SUCCESS);
1119: }

1121: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1122: {
1123:   PetscFunctionBegin;
1124:   PetscCall(MatSetType(A, MATSEQAIJ));
1125:   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
1126:   PetscFunctionReturn(PETSC_SUCCESS);
1127: }