Actual source code: aijmkl.c
  1: /*
  2:   Defines basic operations for the MATSEQAIJMKL matrix class.
  3:   This class is derived from the MATSEQAIJ class and retains the
  4:   compressed row storage (aka Yale sparse matrix format) but uses
  5:   sparse BLAS operations from the Intel Math Kernel Library (MKL)
  6:   wherever possible.
  7: */

  9: #include <../src/mat/impls/aij/seq/aij.h>
 10: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
 11: #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
 12:   #define MKL_ILP64
 13: #endif
 14: #include <mkl_spblas.h>

 16: typedef struct {
 17:   PetscBool        no_SpMV2;         /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
 18:   PetscBool        eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
 19:   PetscBool        sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
 20:   PetscObjectState state;
 21: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 22:   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
 23:   struct matrix_descr descr;
 24: #endif
 25: } Mat_SeqAIJMKL;

 27: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
 28: {
 29:   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
 30:   /* so we will ignore 'MatType type'. */
 31:   Mat B = *newmat;
 32: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 33:   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
 34: #endif

 36:   PetscFunctionBegin;
 37:   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));

 39:   /* Reset the original function pointers. */
 40:   B->ops->duplicate               = MatDuplicate_SeqAIJ;
 41:   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
 42:   B->ops->destroy                 = MatDestroy_SeqAIJ;
 43:   B->ops->mult                    = MatMult_SeqAIJ;
 44:   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
 45:   B->ops->multadd                 = MatMultAdd_SeqAIJ;
 46:   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
 47:   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
 48:   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
 49:   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
 50:   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
 51:   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
 52:   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;

 54:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL));

 56: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 57:   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
 58:    * simply involves destroying the MKL sparse matrix handle and then freeing
 59:    * the spptr pointer. */
 60:   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;

 62:   if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
 63: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
 64:   PetscCall(PetscFree(B->spptr));

 66:   /* Change the type of B to MATSEQAIJ. */
 67:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));

 69:   *newmat = B;
 70:   PetscFunctionReturn(PETSC_SUCCESS);
 71: }

 73: static PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
 74: {
 75:   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;

 77:   PetscFunctionBegin;
 78:   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
 79:   if (aijmkl) {
 80:     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
 81: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 82:     if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
 83: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
 84:     PetscCall(PetscFree(A->spptr));
 85:   }

 87:   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
 88:    * to destroy everything that remains. */
 89:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
 90:   /* I don't call MatSetType().  I believe this is because that
 91:    * is only to be called when *building* a matrix.  I could be wrong, but
 92:    * that is how things work for the SuperLU matrix class. */
 93:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL));
 94:   PetscCall(MatDestroy_SeqAIJ(A));
 95:   PetscFunctionReturn(PETSC_SUCCESS);
 96: }

 98: /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
 99:  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
100:  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
101:  * handle, creates a new one, and then calls mkl_sparse_optimize().
102:  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
103:  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
104:  * an unoptimized matrix handle here. */
105: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
106: {
107: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
108:   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
109:    * does nothing. We make it callable anyway in this case because it cuts
110:    * down on littering the code with #ifdefs. */
111:   PetscFunctionBegin;
112:   PetscFunctionReturn(PETSC_SUCCESS);
113: #else
114:   Mat_SeqAIJ    *a      = (Mat_SeqAIJ *)A->data;
115:   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
116:   PetscInt       m, n;
117:   MatScalar     *aa;
118:   PetscInt      *aj, *ai;

120:   PetscFunctionBegin;
121:   #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
122:   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
123:    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
124:    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
125:    * mkl_sparse_optimize() later. */
126:   if (aijmkl->no_SpMV2) PetscFunctionReturn(PETSC_SUCCESS);
127:   #endif

129:   if (aijmkl->sparse_optimized) {
130:     /* Matrix has been previously assembled and optimized. Must destroy old
131:      * matrix handle before running the optimization step again. */
132:     PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
133:   }
134:   aijmkl->sparse_optimized = PETSC_FALSE;

136:   /* Now perform the SpMV2 setup and matrix optimization. */
137:   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
138:   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
139:   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
140:   m                  = A->rmap->n;
141:   n                  = A->cmap->n;
142:   aj                 = a->j; /* aj[k] gives column index for element aa[k]. */
143:   aa                 = a->a; /* Nonzero elements stored row-by-row. */
144:   ai                 = a->i; /* ai[k] is the position in aa and aj where row k starts. */
145:   if (a->nz && aa && !A->structure_only) {
146:     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
147:      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
148:     PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, (MKL_INT)m, (MKL_INT)n, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa);
149:     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
150:     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
151:     if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
152:     aijmkl->sparse_optimized = PETSC_TRUE;
153:     PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
154:   } else {
155:     aijmkl->csrA = NULL;
156:   }
157:   PetscFunctionReturn(PETSC_SUCCESS);
158: #endif
159: }

161: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
162: /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
163: static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A)
164: {
165:   sparse_index_base_t indexing;
166:   PetscInt            m, n;
167:   PetscInt           *aj, *ai, *dummy;
168:   MatScalar          *aa;
169:   Mat_SeqAIJMKL      *aijmkl;

171:   PetscFunctionBegin;
172:   if (csrA) {
173:     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
174:     PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, (MKL_INT *)&m, (MKL_INT *)&n, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
175:     PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
176:   } else {
177:     aj = ai = NULL;
178:     aa      = NULL;
179:   }

181:   PetscCall(MatSetType(A, MATSEQAIJ));
182:   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols));
183:   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
184:    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
185:    * they will be destroyed when the MKL handle is destroyed.
186:    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
187:   if (csrA) {
188:     PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL));
189:   } else {
190:     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
191:     PetscCall(MatSetUp(A));
192:     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
193:     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
194:   }

196:   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
197:    * Now turn it into a MATSEQAIJMKL. */
198:   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));

200:   aijmkl       = (Mat_SeqAIJMKL *)A->spptr;
201:   aijmkl->csrA = csrA;

203:   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
204:    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
205:    * and just need to be able to run the MKL optimization step. */
206:   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
207:   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
208:   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
209:   if (csrA) {
210:     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
211:     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
212:   }
213:   PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
214:   PetscFunctionReturn(PETSC_SUCCESS);
215: }
216: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

218: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
219:  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
220:  * MatMatMultNumeric(). */
221: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
222: static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
223: {
224:   PetscInt            i;
225:   PetscInt            nrows, ncols;
226:   PetscInt            nz;
227:   PetscInt           *ai, *aj, *dummy;
228:   PetscScalar        *aa;
229:   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
230:   sparse_index_base_t indexing;

232:   PetscFunctionBegin;
233:   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
234:   if (!aijmkl->csrA) PetscFunctionReturn(PETSC_SUCCESS);

236:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
237:   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);

239:   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
240:    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
241:   for (i = 0; i < nrows; i++) {
242:     nz = ai[i + 1] - ai[i];
243:     PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES));
244:   }

246:   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
247:   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));

249:   PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
250:   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
251:    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
252:    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
253:   aijmkl->sparse_optimized = PETSC_FALSE;
254:   PetscFunctionReturn(PETSC_SUCCESS);
255: }
256: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

258: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
259: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer)
260: {
261:   PetscInt            i, j, k;
262:   PetscInt            nrows, ncols;
263:   PetscInt            nz;
264:   PetscInt           *ai, *aj, *dummy;
265:   PetscScalar        *aa;
266:   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
267:   sparse_index_base_t indexing;

269:   PetscFunctionBegin;
270:   PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));

272:   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
273:   if (!aijmkl->csrA) {
274:     PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n"));
275:     PetscFunctionReturn(PETSC_SUCCESS);
276:   }

278:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
279:   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);

281:   k = 0;
282:   for (i = 0; i < nrows; i++) {
283:     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i));
284:     nz = ai[i + 1] - ai[i];
285:     for (j = 0; j < nz; j++) {
286:       if (aa) {
287:         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g)  ", aj[k], PetscRealPart(aa[k])));
288:       } else {
289:         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]));
290:       }
291:       k++;
292:     }
293:     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
294:   }
295:   PetscFunctionReturn(PETSC_SUCCESS);
296: }
297: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

299: static PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
300: {
301:   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
302:   Mat_SeqAIJMKL *aijmkl_dest;

304:   PetscFunctionBegin;
305:   PetscCall(MatDuplicate_SeqAIJ(A, op, M));
306:   aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
307:   PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1));
308:   aijmkl_dest->sparse_optimized = PETSC_FALSE;
309:   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
310:   PetscFunctionReturn(PETSC_SUCCESS);
311: }

313: static PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
314: {
315:   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data;
316:   Mat_SeqAIJMKL *aijmkl;

318:   PetscFunctionBegin;
319:   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);

321:   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
322:    * extra information and some different methods, call the AssemblyEnd
323:    * routine for a MATSEQAIJ.
324:    * I'm not sure if this is the best way to do this, but it avoids
325:    * a lot of code duplication. */
326:   a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
327:   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));

329:   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
330:    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
331:   aijmkl = (Mat_SeqAIJMKL *)A->spptr;
332:   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
333:   PetscFunctionReturn(PETSC_SUCCESS);
334: }

336: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
337: static PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy)
338: {
339:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
340:   const PetscScalar *x;
341:   PetscScalar       *y;
342:   const MatScalar   *aa;
343:   PetscInt           m     = A->rmap->n;
344:   PetscInt           n     = A->cmap->n;
345:   PetscScalar        alpha = 1.0;
346:   PetscScalar        beta  = 0.0;
347:   const PetscInt    *aj, *ai;
348:   char               matdescra[6];

350:   /* Variables not in MatMult_SeqAIJ. */
351:   char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */

353:   PetscFunctionBegin;
354:   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
355:   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
356:   PetscCall(VecGetArrayRead(xx, &x));
357:   PetscCall(VecGetArray(yy, &y));
358:   aj = a->j; /* aj[k] gives column index for element aa[k]. */
359:   aa = a->a; /* Nonzero elements stored row-by-row. */
360:   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */

362:   /* Call MKL sparse BLAS routine to do the MatMult. */
363:   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);

365:   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
366:   PetscCall(VecRestoreArrayRead(xx, &x));
367:   PetscCall(VecRestoreArray(yy, &y));
368:   PetscFunctionReturn(PETSC_SUCCESS);
369: }
370: #endif

372: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
373: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
374: {
375:   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
376:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
377:   const PetscScalar *x;
378:   PetscScalar       *y;
379:   PetscObjectState   state;

381:   PetscFunctionBegin;
382:   /* If there are no nonzero entries, zero yy and return immediately. */
383:   if (!a->nz) {
384:     PetscCall(VecGetArray(yy, &y));
385:     PetscCall(PetscArrayzero(y, A->rmap->n));
386:     PetscCall(VecRestoreArray(yy, &y));
387:     PetscFunctionReturn(PETSC_SUCCESS);
388:   }

390:   PetscCall(VecGetArrayRead(xx, &x));
391:   PetscCall(VecGetArray(yy, &y));

393:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
394:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
395:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
396:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
397:   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));

399:   /* Call MKL SpMV2 executor routine to do the MatMult. */
400:   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);

402:   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
403:   PetscCall(VecRestoreArrayRead(xx, &x));
404:   PetscCall(VecRestoreArray(yy, &y));
405:   PetscFunctionReturn(PETSC_SUCCESS);
406: }
407: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

409: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
410: static PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy)
411: {
412:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
413:   const PetscScalar *x;
414:   PetscScalar       *y;
415:   const MatScalar   *aa;
416:   PetscInt           m     = A->rmap->n;
417:   PetscInt           n     = A->cmap->n;
418:   PetscScalar        alpha = 1.0;
419:   PetscScalar        beta  = 0.0;
420:   const PetscInt    *aj, *ai;
421:   char               matdescra[6];

423:   /* Variables not in MatMultTranspose_SeqAIJ. */
424:   char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */

426:   PetscFunctionBegin;
427:   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
428:   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
429:   PetscCall(VecGetArrayRead(xx, &x));
430:   PetscCall(VecGetArray(yy, &y));
431:   aj = a->j; /* aj[k] gives column index for element aa[k]. */
432:   aa = a->a; /* Nonzero elements stored row-by-row. */
433:   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */

435:   /* Call MKL sparse BLAS routine to do the MatMult. */
436:   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);

438:   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
439:   PetscCall(VecRestoreArrayRead(xx, &x));
440:   PetscCall(VecRestoreArray(yy, &y));
441:   PetscFunctionReturn(PETSC_SUCCESS);
442: }
443: #endif

445: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
446: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
447: {
448:   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
449:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
450:   const PetscScalar *x;
451:   PetscScalar       *y;
452:   PetscObjectState   state;

454:   PetscFunctionBegin;
455:   /* If there are no nonzero entries, zero yy and return immediately. */
456:   if (!a->nz) {
457:     PetscCall(VecGetArray(yy, &y));
458:     PetscCall(PetscArrayzero(y, A->cmap->n));
459:     PetscCall(VecRestoreArray(yy, &y));
460:     PetscFunctionReturn(PETSC_SUCCESS);
461:   }

463:   PetscCall(VecGetArrayRead(xx, &x));
464:   PetscCall(VecGetArray(yy, &y));

466:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
467:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
468:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
469:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
470:   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));

472:   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
473:   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);

475:   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
476:   PetscCall(VecRestoreArrayRead(xx, &x));
477:   PetscCall(VecRestoreArray(yy, &y));
478:   PetscFunctionReturn(PETSC_SUCCESS);
479: }
480: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

482: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
483: static PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
484: {
485:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
486:   const PetscScalar *x;
487:   PetscScalar       *y, *z;
488:   const MatScalar   *aa;
489:   PetscInt           m = A->rmap->n;
490:   PetscInt           n = A->cmap->n;
491:   const PetscInt    *aj, *ai;
492:   PetscInt           i;

494:   /* Variables not in MatMultAdd_SeqAIJ. */
495:   char        transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
496:   PetscScalar alpha  = 1.0;
497:   PetscScalar beta;
498:   char        matdescra[6];

500:   PetscFunctionBegin;
501:   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
502:   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */

504:   PetscCall(VecGetArrayRead(xx, &x));
505:   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
506:   aj = a->j; /* aj[k] gives column index for element aa[k]. */
507:   aa = a->a; /* Nonzero elements stored row-by-row. */
508:   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */

510:   /* Call MKL sparse BLAS routine to do the MatMult. */
511:   if (zz == yy) {
512:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
513:     beta = 1.0;
514:     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
515:   } else {
516:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
517:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
518:     beta = 0.0;
519:     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
520:     for (i = 0; i < m; i++) z[i] += y[i];
521:   }

523:   PetscCall(PetscLogFlops(2.0 * a->nz));
524:   PetscCall(VecRestoreArrayRead(xx, &x));
525:   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
526:   PetscFunctionReturn(PETSC_SUCCESS);
527: }
528: #endif

530: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
531: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
532: {
533:   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
534:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
535:   const PetscScalar *x;
536:   PetscScalar       *y, *z;
537:   PetscInt           m = A->rmap->n;
538:   PetscInt           i;

540:   /* Variables not in MatMultAdd_SeqAIJ. */
541:   PetscObjectState state;

543:   PetscFunctionBegin;
544:   /* If there are no nonzero entries, set zz = yy and return immediately. */
545:   if (!a->nz) {
546:     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
547:     PetscCall(PetscArraycpy(z, y, m));
548:     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
549:     PetscFunctionReturn(PETSC_SUCCESS);
550:   }

552:   PetscCall(VecGetArrayRead(xx, &x));
553:   PetscCall(VecGetArrayPair(yy, zz, &y, &z));

555:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
556:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
557:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
558:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
559:   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));

561:   /* Call MKL sparse BLAS routine to do the MatMult. */
562:   if (zz == yy) {
563:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
564:      * with alpha and beta both set to 1.0. */
565:     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
566:   } else {
567:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
568:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
569:     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
570:     for (i = 0; i < m; i++) z[i] += y[i];
571:   }

573:   PetscCall(PetscLogFlops(2.0 * a->nz));
574:   PetscCall(VecRestoreArrayRead(xx, &x));
575:   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
576:   PetscFunctionReturn(PETSC_SUCCESS);
577: }
578: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

580: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
581: static PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
582: {
583:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
584:   const PetscScalar *x;
585:   PetscScalar       *y, *z;
586:   const MatScalar   *aa;
587:   PetscInt           m = A->rmap->n;
588:   PetscInt           n = A->cmap->n;
589:   const PetscInt    *aj, *ai;
590:   PetscInt           i;

592:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
593:   char        transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
594:   PetscScalar alpha  = 1.0;
595:   PetscScalar beta;
596:   char        matdescra[6];

598:   PetscFunctionBegin;
599:   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
600:   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */

602:   PetscCall(VecGetArrayRead(xx, &x));
603:   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
604:   aj = a->j; /* aj[k] gives column index for element aa[k]. */
605:   aa = a->a; /* Nonzero elements stored row-by-row. */
606:   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */

608:   /* Call MKL sparse BLAS routine to do the MatMult. */
609:   if (zz == yy) {
610:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
611:     beta = 1.0;
612:     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
613:   } else {
614:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
615:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
616:     beta = 0.0;
617:     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
618:     for (i = 0; i < n; i++) z[i] += y[i];
619:   }

621:   PetscCall(PetscLogFlops(2.0 * a->nz));
622:   PetscCall(VecRestoreArrayRead(xx, &x));
623:   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
624:   PetscFunctionReturn(PETSC_SUCCESS);
625: }
626: #endif

628: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
629: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
630: {
631:   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
632:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
633:   const PetscScalar *x;
634:   PetscScalar       *y, *z;
635:   PetscInt           n = A->cmap->n;
636:   PetscInt           i;
637:   PetscObjectState   state;

639:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */

641:   PetscFunctionBegin;
642:   /* If there are no nonzero entries, set zz = yy and return immediately. */
643:   if (!a->nz) {
644:     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
645:     PetscCall(PetscArraycpy(z, y, n));
646:     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
647:     PetscFunctionReturn(PETSC_SUCCESS);
648:   }

650:   PetscCall(VecGetArrayRead(xx, &x));
651:   PetscCall(VecGetArrayPair(yy, zz, &y, &z));

653:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
654:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
655:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
656:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
657:   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));

659:   /* Call MKL sparse BLAS routine to do the MatMult. */
660:   if (zz == yy) {
661:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
662:      * with alpha and beta both set to 1.0. */
663:     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
664:   } else {
665:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
666:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
667:     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
668:     for (i = 0; i < n; i++) z[i] += y[i];
669:   }

671:   PetscCall(PetscLogFlops(2.0 * a->nz));
672:   PetscCall(VecRestoreArrayRead(xx, &x));
673:   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
674:   PetscFunctionReturn(PETSC_SUCCESS);
675: }
676: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

678: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
679: static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
680: {
681:   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
682:   sparse_matrix_t     csrA, csrB, csrC;
683:   PetscInt            nrows, ncols;
684:   struct matrix_descr descr_type_gen;
685:   PetscObjectState    state;

687:   PetscFunctionBegin;
688:   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
689:    * not handle sparse matrices with zero rows or columns. */
690:   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
691:   else nrows = A->cmap->N;
692:   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
693:   else ncols = B->rmap->N;

695:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
696:   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
697:   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
698:   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
699:   csrA                = a->csrA;
700:   csrB                = b->csrA;
701:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

703:   if (csrA && csrB) {
704:     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
705:   } else {
706:     csrC = NULL;
707:   }

709:   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C));
710:   PetscFunctionReturn(PETSC_SUCCESS);
711: }

713: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
714: {
715:   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
716:   sparse_matrix_t     csrA, csrB, csrC;
717:   struct matrix_descr descr_type_gen;
718:   PetscObjectState    state;

720:   PetscFunctionBegin;
721:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
722:   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
723:   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
724:   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
725:   csrA                = a->csrA;
726:   csrB                = b->csrA;
727:   csrC                = c->csrA;
728:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

730:   if (csrA && csrB) {
731:     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
732:   } else {
733:     csrC = NULL;
734:   }

736:   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
737:   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
738:   PetscFunctionReturn(PETSC_SUCCESS);
739: }

741: PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
742: {
743:   PetscFunctionBegin;
744:   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
745:   PetscFunctionReturn(PETSC_SUCCESS);
746: }

748: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
749: {
750:   PetscFunctionBegin;
751:   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
752:   PetscFunctionReturn(PETSC_SUCCESS);
753: }

755: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
756: {
757:   PetscFunctionBegin;
758:   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
759:   PetscFunctionReturn(PETSC_SUCCESS);
760: }

762: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
763: {
764:   PetscFunctionBegin;
765:   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
766:   PetscFunctionReturn(PETSC_SUCCESS);
767: }

769: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
770: {
771:   PetscFunctionBegin;
772:   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
773:   PetscFunctionReturn(PETSC_SUCCESS);
774: }

776: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
777: {
778:   PetscFunctionBegin;
779:   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
780:   PetscFunctionReturn(PETSC_SUCCESS);
781: }

783: static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
784: {
785:   Mat_Product *product = C->product;
786:   Mat          A = product->A, B = product->B;

788:   PetscFunctionBegin;
789:   PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C));
790:   PetscFunctionReturn(PETSC_SUCCESS);
791: }

793: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
794: {
795:   Mat_Product *product = C->product;
796:   Mat          A = product->A, B = product->B;
797:   PetscReal    fill = product->fill;

799:   PetscFunctionBegin;
800:   PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C));
801:   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
802:   PetscFunctionReturn(PETSC_SUCCESS);
803: }

805: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C)
806: {
807:   Mat                 Ct;
808:   Vec                 zeros;
809:   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
810:   sparse_matrix_t     csrA, csrP, csrC;
811:   PetscBool           set, flag;
812:   struct matrix_descr descr_type_sym;
813:   PetscObjectState    state;

815:   PetscFunctionBegin;
816:   PetscCall(MatIsSymmetricKnown(A, &set, &flag));
817:   PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");

819:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
820:   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
821:   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
822:   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
823:   csrA                = a->csrA;
824:   csrP                = p->csrA;
825:   csrC                = c->csrA;
826:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
827:   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
828:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

830:   /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
831:   PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);

833:   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
834:    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
835:    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
836:    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
837:    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
838:    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
839:    * the full matrix. */
840:   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
841:   PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct));
842:   PetscCall(MatCreateVecs(C, &zeros, NULL));
843:   PetscCall(VecSetFromOptions(zeros));
844:   PetscCall(VecZeroEntries(zeros));
845:   PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES));
846:   PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN));
847:   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
848:   PetscCall(MatProductCreateWithMat(A, P, NULL, C));
849:   PetscCall(MatProductSetType(C, MATPRODUCT_PtAP));
850:   PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
851:   PetscCall(VecDestroy(&zeros));
852:   PetscCall(MatDestroy(&Ct));
853:   PetscFunctionReturn(PETSC_SUCCESS);
854: }

856: PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
857: {
858:   Mat_Product        *product = C->product;
859:   Mat                 A = product->A, P = product->B;
860:   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
861:   sparse_matrix_t     csrA, csrP, csrC;
862:   struct matrix_descr descr_type_sym;
863:   PetscObjectState    state;

865:   PetscFunctionBegin;
866:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
867:   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
868:   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
869:   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
870:   csrA                = a->csrA;
871:   csrP                = p->csrA;
872:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
873:   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
874:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

876:   /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
877:   if (csrP && csrA) {
878:     PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
879:   } else {
880:     csrC = NULL;
881:   }

883:   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
884:    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
885:    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
886:    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
887:    * is guaranteed. */
888:   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C));

890:   C->ops->productnumeric = MatProductNumeric_PtAP;
891:   PetscFunctionReturn(PETSC_SUCCESS);
892: }

894: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
895: {
896:   PetscFunctionBegin;
897:   C->ops->productsymbolic = MatProductSymbolic_AB;
898:   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
899:   PetscFunctionReturn(PETSC_SUCCESS);
900: }

902: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
903: {
904:   PetscFunctionBegin;
905:   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
906:   PetscFunctionReturn(PETSC_SUCCESS);
907: }

909: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
910: {
911:   PetscFunctionBegin;
912:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
913:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
914:   PetscFunctionReturn(PETSC_SUCCESS);
915: }

917: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
918: {
919:   Mat_Product *product = C->product;
920:   Mat          A       = product->A;
921:   PetscBool    set, flag;

923:   PetscFunctionBegin;
924:   /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
925:   PetscCall(MatIsSymmetricKnown(A, &set, &flag));
926:   PetscCheck(!PetscDefined(USE_COMPLEX) && set && flag, PETSC_COMM_SELF, PETSC_ERR_SUP, "MATPRODUCT_PtAP not supported for type SeqAIJMKL");
927:   /* we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal() */
928:   C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
929:   PetscFunctionReturn(PETSC_SUCCESS);
930: }

932: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
933: {
934:   PetscFunctionBegin;
935:   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
936:   PetscFunctionReturn(PETSC_SUCCESS);
937: }

939: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
940: {
941:   PetscFunctionBegin;
942:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MATPRODUCT_ABC not supported for type SeqAIJMKL");
943: }

945: PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
946: {
947:   Mat_Product *product = C->product;

949:   PetscFunctionBegin;
950:   switch (product->type) {
951:   case MATPRODUCT_AB:
952:     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C));
953:     break;
954:   case MATPRODUCT_AtB:
955:     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C));
956:     break;
957:   case MATPRODUCT_ABt:
958:     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C));
959:     break;
960:   case MATPRODUCT_PtAP:
961:     PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C));
962:     break;
963:   case MATPRODUCT_RARt:
964:     PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C));
965:     break;
966:   case MATPRODUCT_ABC:
967:     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C));
968:     break;
969:   default:
970:     break;
971:   }
972:   PetscFunctionReturn(PETSC_SUCCESS);
973: }
974: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

976: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
977:  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
978:  * routine, but can also be used to convert an assembled SeqAIJ matrix
979:  * into a SeqAIJMKL one. */
980: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
981: {
982:   Mat            B = *newmat;
983:   Mat_SeqAIJMKL *aijmkl;
984:   PetscBool      set;
985:   PetscBool      sametype;

987:   PetscFunctionBegin;
988:   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));

990:   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
991:   if (sametype) PetscFunctionReturn(PETSC_SUCCESS);

993:   PetscCall(PetscNew(&aijmkl));
994:   B->spptr = (void *)aijmkl;

996:   /* Set function pointers for methods that we inherit from AIJ but override.
997:    * We also parse some command line options below, since those determine some of the methods we point to. */
998:   B->ops->duplicate   = MatDuplicate_SeqAIJMKL;
999:   B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
1000:   B->ops->destroy     = MatDestroy_SeqAIJMKL;

1002:   aijmkl->sparse_optimized = PETSC_FALSE;
1003: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1004:   aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
1005: #else
1006:   aijmkl->no_SpMV2 = PETSC_TRUE;
1007: #endif
1008:   aijmkl->eager_inspection = PETSC_FALSE;

1010:   /* Parse command line options. */
1011:   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
1012:   PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set));
1013:   PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set));
1014:   PetscOptionsEnd();
1015: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1016:   if (!aijmkl->no_SpMV2) {
1017:     PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n"));
1018:     aijmkl->no_SpMV2 = PETSC_TRUE;
1019:   }
1020: #endif

1022: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1023:   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
1024:   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
1025:   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
1026:   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1027:   #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1028:   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1029:   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1030:   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1031:   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1032:   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1033:     #if !defined(PETSC_USE_COMPLEX)
1034:   B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1035:     #else
1036:   B->ops->ptapnumeric = NULL;
1037:     #endif
1038:   #endif
1039: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

1041: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1042:   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1043:    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1044:    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1045:    * versions in which the older interface has not been deprecated, we use the old interface. */
1046:   if (aijmkl->no_SpMV2) {
1047:     B->ops->mult             = MatMult_SeqAIJMKL;
1048:     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1049:     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1050:     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1051:   }
1052: #endif

1054:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ));

1056:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL));
1057:   *newmat = B;
1058:   PetscFunctionReturn(PETSC_SUCCESS);
1059: }

1061: /*@C
1062:   MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`.

1064:   Collective

1066:   Input Parameters:
1067: + comm - MPI communicator, set to `PETSC_COMM_SELF`
1068: . m    - number of rows
1069: . n    - number of columns
1070: . nz   - number of nonzeros per row (same for all rows)
1071: - nnz  - array containing the number of nonzeros in the various rows
1072:          (possibly different for each row) or `NULL`

1074:   Output Parameter:
1075: . A - the matrix

1077:   Options Database Keys:
1078: + -mat_aijmkl_no_spmv2         - disable use of the SpMV2 inspector-executor routines
1079: - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection,
1080:                                   performing this step the first time the matrix is applied

1082:   Level: intermediate

1084:   Notes:
1085:   If `nnz` is given then `nz` is ignored

1087:   This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS
1088:   routines from Intel MKL whenever possible.

1090:   If the installed version of MKL supports the "SpMV2" sparse
1091:   inspector-executor routines, then those are used by default.

1093:   `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()`
1094:   (for symmetric A) operations are currently supported.
1095:   MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`.

1097: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
1098: @*/
1099: PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
1100: {
1101:   PetscFunctionBegin;
1102:   PetscCall(MatCreate(comm, A));
1103:   PetscCall(MatSetSizes(*A, m, n, m, n));
1104:   PetscCall(MatSetType(*A, MATSEQAIJMKL));
1105:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
1106:   PetscFunctionReturn(PETSC_SUCCESS);
1107: }

1109: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1110: {
1111:   PetscFunctionBegin;
1112:   PetscCall(MatSetType(A, MATSEQAIJ));
1113:   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
1114:   PetscFunctionReturn(PETSC_SUCCESS);
1115: }