Actual source code: aijmkl.c
1: /*
2: Defines basic operations for the MATSEQAIJMKL matrix class.
3: This class is derived from the MATSEQAIJ class and retains the
4: compressed row storage (aka Yale sparse matrix format) but uses
5: sparse BLAS operations from the Intel Math Kernel Library (MKL)
6: wherever possible.
7: */
9: #include <../src/mat/impls/aij/seq/aij.h>
10: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11: #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
12: #define MKL_ILP64
13: #endif
14: #include <mkl_spblas.h>
16: typedef struct {
17: PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
18: PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
19: PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
20: PetscObjectState state;
21: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
22: sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
23: struct matrix_descr descr;
24: #endif
25: } Mat_SeqAIJMKL;
27: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
28: {
29: /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30: /* so we will ignore 'MatType type'. */
31: Mat B = *newmat;
32: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
33: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
34: #endif
36: PetscFunctionBegin;
37: if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
39: /* Reset the original function pointers. */
40: B->ops->duplicate = MatDuplicate_SeqAIJ;
41: B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
42: B->ops->destroy = MatDestroy_SeqAIJ;
43: B->ops->mult = MatMult_SeqAIJ;
44: B->ops->multtranspose = MatMultTranspose_SeqAIJ;
45: B->ops->multadd = MatMultAdd_SeqAIJ;
46: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
47: B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ;
48: B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
49: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;
50: B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
51: B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
52: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ;
54: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL));
56: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
57: /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
58: * simply involves destroying the MKL sparse matrix handle and then freeing
59: * the spptr pointer. */
60: if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;
62: if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
63: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
64: PetscCall(PetscFree(B->spptr));
66: /* Change the type of B to MATSEQAIJ. */
67: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));
69: *newmat = B;
70: PetscFunctionReturn(PETSC_SUCCESS);
71: }
73: static PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
74: {
75: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
77: PetscFunctionBegin;
78: /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
79: if (aijmkl) {
80: /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
81: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
82: if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
83: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
84: PetscCall(PetscFree(A->spptr));
85: }
87: /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
88: * to destroy everything that remains. */
89: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
90: /* I don't call MatSetType(). I believe this is because that
91: * is only to be called when *building* a matrix. I could be wrong, but
92: * that is how things work for the SuperLU matrix class. */
93: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL));
94: PetscCall(MatDestroy_SeqAIJ(A));
95: PetscFunctionReturn(PETSC_SUCCESS);
96: }
98: /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
99: * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
100: * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
101: * handle, creates a new one, and then calls mkl_sparse_optimize().
102: * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
103: * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
104: * an unoptimized matrix handle here. */
105: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
106: {
107: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
108: /* If the MKL library does not have mkl_sparse_optimize(), then this routine
109: * does nothing. We make it callable anyway in this case because it cuts
110: * down on littering the code with #ifdefs. */
111: PetscFunctionBegin;
112: PetscFunctionReturn(PETSC_SUCCESS);
113: #else
114: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
115: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
116: PetscInt m, n;
117: MatScalar *aa;
118: PetscInt *aj, *ai;
120: PetscFunctionBegin;
121: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
122: /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
123: * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
124: * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
125: * mkl_sparse_optimize() later. */
126: if (aijmkl->no_SpMV2) PetscFunctionReturn(PETSC_SUCCESS);
127: #endif
129: if (aijmkl->sparse_optimized) {
130: /* Matrix has been previously assembled and optimized. Must destroy old
131: * matrix handle before running the optimization step again. */
132: PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
133: }
134: aijmkl->sparse_optimized = PETSC_FALSE;
136: /* Now perform the SpMV2 setup and matrix optimization. */
137: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
138: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
139: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
140: m = A->rmap->n;
141: n = A->cmap->n;
142: aj = a->j; /* aj[k] gives column index for element aa[k]. */
143: aa = a->a; /* Nonzero elements stored row-by-row. */
144: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
145: if (a->nz && aa && !A->structure_only) {
146: /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
147: * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
148: PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, (MKL_INT)m, (MKL_INT)n, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa);
149: PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
150: PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
151: if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
152: aijmkl->sparse_optimized = PETSC_TRUE;
153: PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
154: } else {
155: aijmkl->csrA = NULL;
156: }
157: PetscFunctionReturn(PETSC_SUCCESS);
158: #endif
159: }
161: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
162: /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
163: static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A)
164: {
165: sparse_index_base_t indexing;
166: PetscInt m, n;
167: PetscInt *aj, *ai, *dummy;
168: MatScalar *aa;
169: Mat_SeqAIJMKL *aijmkl;
171: PetscFunctionBegin;
172: if (csrA) {
173: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
174: PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, (MKL_INT *)&m, (MKL_INT *)&n, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
175: PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
176: } else {
177: aj = ai = NULL;
178: aa = NULL;
179: }
181: PetscCall(MatSetType(A, MATSEQAIJ));
182: PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols));
183: /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
184: * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
185: * they will be destroyed when the MKL handle is destroyed.
186: * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
187: if (csrA) {
188: PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL));
189: } else {
190: /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
191: PetscCall(MatSetUp(A));
192: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
193: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
194: }
196: /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
197: * Now turn it into a MATSEQAIJMKL. */
198: PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
200: aijmkl = (Mat_SeqAIJMKL *)A->spptr;
201: aijmkl->csrA = csrA;
203: /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
204: * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
205: * and just need to be able to run the MKL optimization step. */
206: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
207: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
208: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
209: if (csrA) {
210: PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
211: PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
212: }
213: PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
214: PetscFunctionReturn(PETSC_SUCCESS);
215: }
216: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
218: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
219: * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
220: * MatMatMultNumeric(). */
221: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
222: static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
223: {
224: PetscInt i;
225: PetscInt nrows, ncols;
226: PetscInt nz;
227: PetscInt *ai, *aj, *dummy;
228: PetscScalar *aa;
229: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
230: sparse_index_base_t indexing;
232: PetscFunctionBegin;
233: /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
234: if (!aijmkl->csrA) PetscFunctionReturn(PETSC_SUCCESS);
236: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
237: PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
239: /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
240: * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
241: for (i = 0; i < nrows; i++) {
242: nz = ai[i + 1] - ai[i];
243: PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES));
244: }
246: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
247: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
249: PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
250: /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
251: * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
252: * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
253: aijmkl->sparse_optimized = PETSC_FALSE;
254: PetscFunctionReturn(PETSC_SUCCESS);
255: }
256: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
258: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
259: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer)
260: {
261: PetscInt i, j, k;
262: PetscInt nrows, ncols;
263: PetscInt nz;
264: PetscInt *ai, *aj, *dummy;
265: PetscScalar *aa;
266: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
267: sparse_index_base_t indexing;
269: PetscFunctionBegin;
270: PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));
272: /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
273: if (!aijmkl->csrA) {
274: PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n"));
275: PetscFunctionReturn(PETSC_SUCCESS);
276: }
278: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
279: PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
281: k = 0;
282: for (i = 0; i < nrows; i++) {
283: PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i));
284: nz = ai[i + 1] - ai[i];
285: for (j = 0; j < nz; j++) {
286: if (aa) {
287: PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g) ", aj[k], PetscRealPart(aa[k])));
288: } else {
289: PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]));
290: }
291: k++;
292: }
293: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
294: }
295: PetscFunctionReturn(PETSC_SUCCESS);
296: }
297: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
299: static PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
300: {
301: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
302: Mat_SeqAIJMKL *aijmkl_dest;
304: PetscFunctionBegin;
305: PetscCall(MatDuplicate_SeqAIJ(A, op, M));
306: aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
307: PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1));
308: aijmkl_dest->sparse_optimized = PETSC_FALSE;
309: if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
310: PetscFunctionReturn(PETSC_SUCCESS);
311: }
313: static PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
314: {
315: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
316: Mat_SeqAIJMKL *aijmkl;
318: PetscFunctionBegin;
319: if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
321: /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
322: * extra information and some different methods, call the AssemblyEnd
323: * routine for a MATSEQAIJ.
324: * I'm not sure if this is the best way to do this, but it avoids
325: * a lot of code duplication. */
326: a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
327: PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
329: /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
330: * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
331: aijmkl = (Mat_SeqAIJMKL *)A->spptr;
332: if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
333: PetscFunctionReturn(PETSC_SUCCESS);
334: }
336: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
337: static PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy)
338: {
339: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
340: const PetscScalar *x;
341: PetscScalar *y;
342: const MatScalar *aa;
343: PetscInt m = A->rmap->n;
344: PetscInt n = A->cmap->n;
345: PetscScalar alpha = 1.0;
346: PetscScalar beta = 0.0;
347: const PetscInt *aj, *ai;
348: char matdescra[6];
350: /* Variables not in MatMult_SeqAIJ. */
351: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
353: PetscFunctionBegin;
354: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
355: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
356: PetscCall(VecGetArrayRead(xx, &x));
357: PetscCall(VecGetArray(yy, &y));
358: aj = a->j; /* aj[k] gives column index for element aa[k]. */
359: aa = a->a; /* Nonzero elements stored row-by-row. */
360: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
362: /* Call MKL sparse BLAS routine to do the MatMult. */
363: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
365: PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
366: PetscCall(VecRestoreArrayRead(xx, &x));
367: PetscCall(VecRestoreArray(yy, &y));
368: PetscFunctionReturn(PETSC_SUCCESS);
369: }
370: #endif
372: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
373: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
374: {
375: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
376: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
377: const PetscScalar *x;
378: PetscScalar *y;
379: PetscObjectState state;
381: PetscFunctionBegin;
382: /* If there are no nonzero entries, zero yy and return immediately. */
383: if (!a->nz) {
384: PetscCall(VecGetArray(yy, &y));
385: PetscCall(PetscArrayzero(y, A->rmap->n));
386: PetscCall(VecRestoreArray(yy, &y));
387: PetscFunctionReturn(PETSC_SUCCESS);
388: }
390: PetscCall(VecGetArrayRead(xx, &x));
391: PetscCall(VecGetArray(yy, &y));
393: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
394: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
395: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
396: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
397: if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
399: /* Call MKL SpMV2 executor routine to do the MatMult. */
400: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
402: PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
403: PetscCall(VecRestoreArrayRead(xx, &x));
404: PetscCall(VecRestoreArray(yy, &y));
405: PetscFunctionReturn(PETSC_SUCCESS);
406: }
407: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
409: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
410: static PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy)
411: {
412: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
413: const PetscScalar *x;
414: PetscScalar *y;
415: const MatScalar *aa;
416: PetscInt m = A->rmap->n;
417: PetscInt n = A->cmap->n;
418: PetscScalar alpha = 1.0;
419: PetscScalar beta = 0.0;
420: const PetscInt *aj, *ai;
421: char matdescra[6];
423: /* Variables not in MatMultTranspose_SeqAIJ. */
424: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
426: PetscFunctionBegin;
427: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
428: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
429: PetscCall(VecGetArrayRead(xx, &x));
430: PetscCall(VecGetArray(yy, &y));
431: aj = a->j; /* aj[k] gives column index for element aa[k]. */
432: aa = a->a; /* Nonzero elements stored row-by-row. */
433: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
435: /* Call MKL sparse BLAS routine to do the MatMult. */
436: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
438: PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
439: PetscCall(VecRestoreArrayRead(xx, &x));
440: PetscCall(VecRestoreArray(yy, &y));
441: PetscFunctionReturn(PETSC_SUCCESS);
442: }
443: #endif
445: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
446: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
447: {
448: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
449: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
450: const PetscScalar *x;
451: PetscScalar *y;
452: PetscObjectState state;
454: PetscFunctionBegin;
455: /* If there are no nonzero entries, zero yy and return immediately. */
456: if (!a->nz) {
457: PetscCall(VecGetArray(yy, &y));
458: PetscCall(PetscArrayzero(y, A->cmap->n));
459: PetscCall(VecRestoreArray(yy, &y));
460: PetscFunctionReturn(PETSC_SUCCESS);
461: }
463: PetscCall(VecGetArrayRead(xx, &x));
464: PetscCall(VecGetArray(yy, &y));
466: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
467: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
468: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
469: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
470: if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
472: /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
473: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
475: PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
476: PetscCall(VecRestoreArrayRead(xx, &x));
477: PetscCall(VecRestoreArray(yy, &y));
478: PetscFunctionReturn(PETSC_SUCCESS);
479: }
480: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
482: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
483: static PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
484: {
485: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
486: const PetscScalar *x;
487: PetscScalar *y, *z;
488: const MatScalar *aa;
489: PetscInt m = A->rmap->n;
490: PetscInt n = A->cmap->n;
491: const PetscInt *aj, *ai;
492: PetscInt i;
494: /* Variables not in MatMultAdd_SeqAIJ. */
495: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
496: PetscScalar alpha = 1.0;
497: PetscScalar beta;
498: char matdescra[6];
500: PetscFunctionBegin;
501: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
502: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
504: PetscCall(VecGetArrayRead(xx, &x));
505: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
506: aj = a->j; /* aj[k] gives column index for element aa[k]. */
507: aa = a->a; /* Nonzero elements stored row-by-row. */
508: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
510: /* Call MKL sparse BLAS routine to do the MatMult. */
511: if (zz == yy) {
512: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
513: beta = 1.0;
514: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
515: } else {
516: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
517: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
518: beta = 0.0;
519: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
520: for (i = 0; i < m; i++) z[i] += y[i];
521: }
523: PetscCall(PetscLogFlops(2.0 * a->nz));
524: PetscCall(VecRestoreArrayRead(xx, &x));
525: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
526: PetscFunctionReturn(PETSC_SUCCESS);
527: }
528: #endif
530: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
531: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
532: {
533: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
534: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
535: const PetscScalar *x;
536: PetscScalar *y, *z;
537: PetscInt m = A->rmap->n;
538: PetscInt i;
540: /* Variables not in MatMultAdd_SeqAIJ. */
541: PetscObjectState state;
543: PetscFunctionBegin;
544: /* If there are no nonzero entries, set zz = yy and return immediately. */
545: if (!a->nz) {
546: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
547: PetscCall(PetscArraycpy(z, y, m));
548: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
549: PetscFunctionReturn(PETSC_SUCCESS);
550: }
552: PetscCall(VecGetArrayRead(xx, &x));
553: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
555: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
556: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
557: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
558: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
559: if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
561: /* Call MKL sparse BLAS routine to do the MatMult. */
562: if (zz == yy) {
563: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
564: * with alpha and beta both set to 1.0. */
565: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
566: } else {
567: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
568: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
569: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
570: for (i = 0; i < m; i++) z[i] += y[i];
571: }
573: PetscCall(PetscLogFlops(2.0 * a->nz));
574: PetscCall(VecRestoreArrayRead(xx, &x));
575: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
576: PetscFunctionReturn(PETSC_SUCCESS);
577: }
578: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
580: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
581: static PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
582: {
583: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
584: const PetscScalar *x;
585: PetscScalar *y, *z;
586: const MatScalar *aa;
587: PetscInt m = A->rmap->n;
588: PetscInt n = A->cmap->n;
589: const PetscInt *aj, *ai;
590: PetscInt i;
592: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
593: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
594: PetscScalar alpha = 1.0;
595: PetscScalar beta;
596: char matdescra[6];
598: PetscFunctionBegin;
599: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
600: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
602: PetscCall(VecGetArrayRead(xx, &x));
603: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
604: aj = a->j; /* aj[k] gives column index for element aa[k]. */
605: aa = a->a; /* Nonzero elements stored row-by-row. */
606: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
608: /* Call MKL sparse BLAS routine to do the MatMult. */
609: if (zz == yy) {
610: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
611: beta = 1.0;
612: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
613: } else {
614: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
615: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
616: beta = 0.0;
617: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
618: for (i = 0; i < n; i++) z[i] += y[i];
619: }
621: PetscCall(PetscLogFlops(2.0 * a->nz));
622: PetscCall(VecRestoreArrayRead(xx, &x));
623: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
624: PetscFunctionReturn(PETSC_SUCCESS);
625: }
626: #endif
628: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
629: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
630: {
631: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
632: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
633: const PetscScalar *x;
634: PetscScalar *y, *z;
635: PetscInt n = A->cmap->n;
636: PetscInt i;
637: PetscObjectState state;
639: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
641: PetscFunctionBegin;
642: /* If there are no nonzero entries, set zz = yy and return immediately. */
643: if (!a->nz) {
644: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
645: PetscCall(PetscArraycpy(z, y, n));
646: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
647: PetscFunctionReturn(PETSC_SUCCESS);
648: }
650: PetscCall(VecGetArrayRead(xx, &x));
651: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
653: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
654: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
655: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
656: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
657: if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
659: /* Call MKL sparse BLAS routine to do the MatMult. */
660: if (zz == yy) {
661: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
662: * with alpha and beta both set to 1.0. */
663: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
664: } else {
665: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
666: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
667: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
668: for (i = 0; i < n; i++) z[i] += y[i];
669: }
671: PetscCall(PetscLogFlops(2.0 * a->nz));
672: PetscCall(VecRestoreArrayRead(xx, &x));
673: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
674: PetscFunctionReturn(PETSC_SUCCESS);
675: }
676: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
678: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
679: static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
680: {
681: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
682: sparse_matrix_t csrA, csrB, csrC;
683: PetscInt nrows, ncols;
684: struct matrix_descr descr_type_gen;
685: PetscObjectState state;
687: PetscFunctionBegin;
688: /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
689: * not handle sparse matrices with zero rows or columns. */
690: if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
691: else nrows = A->cmap->N;
692: if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
693: else ncols = B->rmap->N;
695: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
696: if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
697: PetscCall(PetscObjectStateGet((PetscObject)B, &state));
698: if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
699: csrA = a->csrA;
700: csrB = b->csrA;
701: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
703: if (csrA && csrB) {
704: PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
705: } else {
706: csrC = NULL;
707: }
709: PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C));
710: PetscFunctionReturn(PETSC_SUCCESS);
711: }
713: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
714: {
715: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
716: sparse_matrix_t csrA, csrB, csrC;
717: struct matrix_descr descr_type_gen;
718: PetscObjectState state;
720: PetscFunctionBegin;
721: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
722: if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
723: PetscCall(PetscObjectStateGet((PetscObject)B, &state));
724: if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
725: csrA = a->csrA;
726: csrB = b->csrA;
727: csrC = c->csrA;
728: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
730: if (csrA && csrB) {
731: PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
732: } else {
733: csrC = NULL;
734: }
736: /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
737: PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
738: PetscFunctionReturn(PETSC_SUCCESS);
739: }
741: PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
742: {
743: PetscFunctionBegin;
744: PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
745: PetscFunctionReturn(PETSC_SUCCESS);
746: }
748: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
749: {
750: PetscFunctionBegin;
751: PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
752: PetscFunctionReturn(PETSC_SUCCESS);
753: }
755: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
756: {
757: PetscFunctionBegin;
758: PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
759: PetscFunctionReturn(PETSC_SUCCESS);
760: }
762: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
763: {
764: PetscFunctionBegin;
765: PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
766: PetscFunctionReturn(PETSC_SUCCESS);
767: }
769: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
770: {
771: PetscFunctionBegin;
772: PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
773: PetscFunctionReturn(PETSC_SUCCESS);
774: }
776: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
777: {
778: PetscFunctionBegin;
779: PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
780: PetscFunctionReturn(PETSC_SUCCESS);
781: }
783: static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
784: {
785: Mat_Product *product = C->product;
786: Mat A = product->A, B = product->B;
788: PetscFunctionBegin;
789: PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C));
790: PetscFunctionReturn(PETSC_SUCCESS);
791: }
793: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
794: {
795: Mat_Product *product = C->product;
796: Mat A = product->A, B = product->B;
797: PetscReal fill = product->fill;
799: PetscFunctionBegin;
800: PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C));
801: C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
802: PetscFunctionReturn(PETSC_SUCCESS);
803: }
805: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C)
806: {
807: Mat Ct;
808: Vec zeros;
809: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
810: sparse_matrix_t csrA, csrP, csrC;
811: PetscBool set, flag;
812: struct matrix_descr descr_type_sym;
813: PetscObjectState state;
815: PetscFunctionBegin;
816: PetscCall(MatIsSymmetricKnown(A, &set, &flag));
817: PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
819: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
820: if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
821: PetscCall(PetscObjectStateGet((PetscObject)P, &state));
822: if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
823: csrA = a->csrA;
824: csrP = p->csrA;
825: csrC = c->csrA;
826: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
827: descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
828: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
830: /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
831: PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);
833: /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
834: * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
835: * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
836: * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
837: * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
838: * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
839: * the full matrix. */
840: PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
841: PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct));
842: PetscCall(MatCreateVecs(C, &zeros, NULL));
843: PetscCall(VecSetFromOptions(zeros));
844: PetscCall(VecZeroEntries(zeros));
845: PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES));
846: PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN));
847: /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
848: PetscCall(MatProductCreateWithMat(A, P, NULL, C));
849: PetscCall(MatProductSetType(C, MATPRODUCT_PtAP));
850: PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
851: PetscCall(VecDestroy(&zeros));
852: PetscCall(MatDestroy(&Ct));
853: PetscFunctionReturn(PETSC_SUCCESS);
854: }
856: PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
857: {
858: Mat_Product *product = C->product;
859: Mat A = product->A, P = product->B;
860: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
861: sparse_matrix_t csrA, csrP, csrC;
862: struct matrix_descr descr_type_sym;
863: PetscObjectState state;
865: PetscFunctionBegin;
866: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
867: if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
868: PetscCall(PetscObjectStateGet((PetscObject)P, &state));
869: if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
870: csrA = a->csrA;
871: csrP = p->csrA;
872: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
873: descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
874: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
876: /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
877: if (csrP && csrA) {
878: PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
879: } else {
880: csrC = NULL;
881: }
883: /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
884: * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
885: * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
886: * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
887: * is guaranteed. */
888: PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C));
890: C->ops->productnumeric = MatProductNumeric_PtAP;
891: PetscFunctionReturn(PETSC_SUCCESS);
892: }
894: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
895: {
896: PetscFunctionBegin;
897: C->ops->productsymbolic = MatProductSymbolic_AB;
898: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
899: PetscFunctionReturn(PETSC_SUCCESS);
900: }
902: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
903: {
904: PetscFunctionBegin;
905: C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
906: PetscFunctionReturn(PETSC_SUCCESS);
907: }
909: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
910: {
911: PetscFunctionBegin;
912: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
913: C->ops->productsymbolic = MatProductSymbolic_ABt;
914: PetscFunctionReturn(PETSC_SUCCESS);
915: }
917: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
918: {
919: Mat_Product *product = C->product;
920: Mat A = product->A;
921: PetscBool set, flag;
923: PetscFunctionBegin;
924: /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
925: PetscCall(MatIsSymmetricKnown(A, &set, &flag));
926: PetscCheck(!PetscDefined(USE_COMPLEX) && set && flag, PETSC_COMM_SELF, PETSC_ERR_SUP, "MATPRODUCT_PtAP not supported for type SeqAIJMKL");
927: /* we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal() */
928: C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
929: PetscFunctionReturn(PETSC_SUCCESS);
930: }
932: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
933: {
934: PetscFunctionBegin;
935: C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
936: PetscFunctionReturn(PETSC_SUCCESS);
937: }
939: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
940: {
941: PetscFunctionBegin;
942: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MATPRODUCT_ABC not supported for type SeqAIJMKL");
943: }
945: PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
946: {
947: Mat_Product *product = C->product;
949: PetscFunctionBegin;
950: switch (product->type) {
951: case MATPRODUCT_AB:
952: PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C));
953: break;
954: case MATPRODUCT_AtB:
955: PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C));
956: break;
957: case MATPRODUCT_ABt:
958: PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C));
959: break;
960: case MATPRODUCT_PtAP:
961: PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C));
962: break;
963: case MATPRODUCT_RARt:
964: PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C));
965: break;
966: case MATPRODUCT_ABC:
967: PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C));
968: break;
969: default:
970: break;
971: }
972: PetscFunctionReturn(PETSC_SUCCESS);
973: }
974: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
976: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
977: * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL()
978: * routine, but can also be used to convert an assembled SeqAIJ matrix
979: * into a SeqAIJMKL one. */
980: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
981: {
982: Mat B = *newmat;
983: Mat_SeqAIJMKL *aijmkl;
984: PetscBool set;
985: PetscBool sametype;
987: PetscFunctionBegin;
988: if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
990: PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
991: if (sametype) PetscFunctionReturn(PETSC_SUCCESS);
993: PetscCall(PetscNew(&aijmkl));
994: B->spptr = (void *)aijmkl;
996: /* Set function pointers for methods that we inherit from AIJ but override.
997: * We also parse some command line options below, since those determine some of the methods we point to. */
998: B->ops->duplicate = MatDuplicate_SeqAIJMKL;
999: B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
1000: B->ops->destroy = MatDestroy_SeqAIJMKL;
1002: aijmkl->sparse_optimized = PETSC_FALSE;
1003: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1004: aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
1005: #else
1006: aijmkl->no_SpMV2 = PETSC_TRUE;
1007: #endif
1008: aijmkl->eager_inspection = PETSC_FALSE;
1010: /* Parse command line options. */
1011: PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
1012: PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set));
1013: PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set));
1014: PetscOptionsEnd();
1015: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1016: if (!aijmkl->no_SpMV2) {
1017: PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"));
1018: aijmkl->no_SpMV2 = PETSC_TRUE;
1019: }
1020: #endif
1022: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1023: B->ops->mult = MatMult_SeqAIJMKL_SpMV2;
1024: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2;
1025: B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2;
1026: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1027: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1028: B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL;
1029: B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1030: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1031: B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1032: B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1033: #if !defined(PETSC_USE_COMPLEX)
1034: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1035: #else
1036: B->ops->ptapnumeric = NULL;
1037: #endif
1038: #endif
1039: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1041: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1042: /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1043: * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1044: * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1045: * versions in which the older interface has not been deprecated, we use the old interface. */
1046: if (aijmkl->no_SpMV2) {
1047: B->ops->mult = MatMult_SeqAIJMKL;
1048: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL;
1049: B->ops->multadd = MatMultAdd_SeqAIJMKL;
1050: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1051: }
1052: #endif
1054: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ));
1056: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL));
1057: *newmat = B;
1058: PetscFunctionReturn(PETSC_SUCCESS);
1059: }
1061: /*@C
1062: MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`.
1064: Collective
1066: Input Parameters:
1067: + comm - MPI communicator, set to `PETSC_COMM_SELF`
1068: . m - number of rows
1069: . n - number of columns
1070: . nz - number of nonzeros per row (same for all rows)
1071: - nnz - array containing the number of nonzeros in the various rows
1072: (possibly different for each row) or `NULL`
1074: Output Parameter:
1075: . A - the matrix
1077: Options Database Keys:
1078: + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1079: - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection,
1080: performing this step the first time the matrix is applied
1082: Level: intermediate
1084: Notes:
1085: If `nnz` is given then `nz` is ignored
1087: This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS
1088: routines from Intel MKL whenever possible.
1090: If the installed version of MKL supports the "SpMV2" sparse
1091: inspector-executor routines, then those are used by default.
1093: `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()`
1094: (for symmetric A) operations are currently supported.
1095: MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`.
1097: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
1098: @*/
1099: PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
1100: {
1101: PetscFunctionBegin;
1102: PetscCall(MatCreate(comm, A));
1103: PetscCall(MatSetSizes(*A, m, n, m, n));
1104: PetscCall(MatSetType(*A, MATSEQAIJMKL));
1105: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
1106: PetscFunctionReturn(PETSC_SUCCESS);
1107: }
1109: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1110: {
1111: PetscFunctionBegin;
1112: PetscCall(MatSetType(A, MATSEQAIJ));
1113: PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
1114: PetscFunctionReturn(PETSC_SUCCESS);
1115: }