Actual source code: aijmkl.c
1: /*
2: Defines basic operations for the MATSEQAIJMKL matrix class.
3: This class is derived from the MATSEQAIJ class and retains the
4: compressed row storage (aka Yale sparse matrix format) but uses
5: sparse BLAS operations from the Intel Math Kernel Library (MKL)
6: wherever possible.
7: */
9: #include <../src/mat/impls/aij/seq/aij.h>
10: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11: #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
12: #define MKL_ILP64
13: #endif
14: #include <mkl_spblas.h>
16: typedef struct {
17: PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
18: PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
19: PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
20: PetscObjectState state;
21: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
22: sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
23: struct matrix_descr descr;
24: #endif
25: } Mat_SeqAIJMKL;
27: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType);
29: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
30: {
31: /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
32: /* so we will ignore 'MatType type'. */
33: Mat B = *newmat;
34: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
35: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
36: #endif
38: PetscFunctionBegin;
39: if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
41: /* Reset the original function pointers. */
42: B->ops->duplicate = MatDuplicate_SeqAIJ;
43: B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
44: B->ops->destroy = MatDestroy_SeqAIJ;
45: B->ops->mult = MatMult_SeqAIJ;
46: B->ops->multtranspose = MatMultTranspose_SeqAIJ;
47: B->ops->multadd = MatMultAdd_SeqAIJ;
48: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
49: B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ;
50: B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
51: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;
52: B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
53: B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
54: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ;
56: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL));
58: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
59: /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
60: * simply involves destroying the MKL sparse matrix handle and then freeing
61: * the spptr pointer. */
62: if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;
64: if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
65: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
66: PetscCall(PetscFree(B->spptr));
68: /* Change the type of B to MATSEQAIJ. */
69: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));
71: *newmat = B;
72: PetscFunctionReturn(PETSC_SUCCESS);
73: }
75: static PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
76: {
77: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
79: PetscFunctionBegin;
80: /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
81: if (aijmkl) {
82: /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
83: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
84: if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
85: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
86: PetscCall(PetscFree(A->spptr));
87: }
89: /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
90: * to destroy everything that remains. */
91: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
92: /* I don't call MatSetType(). I believe this is because that
93: * is only to be called when *building* a matrix. I could be wrong, but
94: * that is how things work for the SuperLU matrix class. */
95: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL));
96: PetscCall(MatDestroy_SeqAIJ(A));
97: PetscFunctionReturn(PETSC_SUCCESS);
98: }
100: /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
101: * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
102: * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
103: * handle, creates a new one, and then calls mkl_sparse_optimize().
104: * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
105: * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
106: * an unoptimized matrix handle here. */
107: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
108: {
109: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
110: /* If the MKL library does not have mkl_sparse_optimize(), then this routine
111: * does nothing. We make it callable anyway in this case because it cuts
112: * down on littering the code with #ifdefs. */
113: PetscFunctionBegin;
114: PetscFunctionReturn(PETSC_SUCCESS);
115: #else
116: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
117: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
118: PetscInt m, n;
119: MatScalar *aa;
120: PetscInt *aj, *ai;
122: PetscFunctionBegin;
123: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
124: /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
125: * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
126: * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
127: * mkl_sparse_optimize() later. */
128: if (aijmkl->no_SpMV2) PetscFunctionReturn(PETSC_SUCCESS);
129: #endif
131: if (aijmkl->sparse_optimized) {
132: /* Matrix has been previously assembled and optimized. Must destroy old
133: * matrix handle before running the optimization step again. */
134: PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
135: }
136: aijmkl->sparse_optimized = PETSC_FALSE;
138: /* Now perform the SpMV2 setup and matrix optimization. */
139: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
140: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
141: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
142: m = A->rmap->n;
143: n = A->cmap->n;
144: aj = a->j; /* aj[k] gives column index for element aa[k]. */
145: aa = a->a; /* Nonzero elements stored row-by-row. */
146: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
147: if (a->nz && aa && !A->structure_only) {
148: /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
149: * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
150: PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, (MKL_INT)m, (MKL_INT)n, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa);
151: PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
152: PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
153: if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
154: aijmkl->sparse_optimized = PETSC_TRUE;
155: PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
156: } else {
157: aijmkl->csrA = NULL;
158: }
159: PetscFunctionReturn(PETSC_SUCCESS);
160: #endif
161: }
163: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
164: /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
165: static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A)
166: {
167: sparse_index_base_t indexing;
168: PetscInt m, n;
169: PetscInt *aj, *ai, *dummy;
170: MatScalar *aa;
171: Mat_SeqAIJMKL *aijmkl;
173: PetscFunctionBegin;
174: if (csrA) {
175: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
176: PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, (MKL_INT *)&m, (MKL_INT *)&n, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
177: PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
178: } else {
179: aj = ai = NULL;
180: aa = NULL;
181: }
183: PetscCall(MatSetType(A, MATSEQAIJ));
184: PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols));
185: /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
186: * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
187: * they will be destroyed when the MKL handle is destroyed.
188: * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
189: if (csrA) {
190: PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL));
191: } else {
192: /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
193: PetscCall(MatSetUp(A));
194: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
195: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
196: }
198: /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
199: * Now turn it into a MATSEQAIJMKL. */
200: PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
202: aijmkl = (Mat_SeqAIJMKL *)A->spptr;
203: aijmkl->csrA = csrA;
205: /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
206: * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
207: * and just need to be able to run the MKL optimization step. */
208: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
209: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
210: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
211: if (csrA) {
212: PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
213: PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
214: }
215: PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
216: PetscFunctionReturn(PETSC_SUCCESS);
217: }
218: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
220: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
221: * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
222: * MatMatMultNumeric(). */
223: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
224: static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
225: {
226: PetscInt i;
227: PetscInt nrows, ncols;
228: PetscInt nz;
229: PetscInt *ai, *aj, *dummy;
230: PetscScalar *aa;
231: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
232: sparse_index_base_t indexing;
234: PetscFunctionBegin;
235: /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
236: if (!aijmkl->csrA) PetscFunctionReturn(PETSC_SUCCESS);
238: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
239: PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
241: /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
242: * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
243: for (i = 0; i < nrows; i++) {
244: nz = ai[i + 1] - ai[i];
245: PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES));
246: }
248: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
249: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
251: PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
252: /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
253: * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
254: * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
255: aijmkl->sparse_optimized = PETSC_FALSE;
256: PetscFunctionReturn(PETSC_SUCCESS);
257: }
258: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
260: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
261: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer)
262: {
263: PetscInt i, j, k;
264: PetscInt nrows, ncols;
265: PetscInt nz;
266: PetscInt *ai, *aj, *dummy;
267: PetscScalar *aa;
268: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
269: sparse_index_base_t indexing;
271: PetscFunctionBegin;
272: PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));
274: /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
275: if (!aijmkl->csrA) {
276: PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n"));
277: PetscFunctionReturn(PETSC_SUCCESS);
278: }
280: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
281: PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
283: k = 0;
284: for (i = 0; i < nrows; i++) {
285: PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i));
286: nz = ai[i + 1] - ai[i];
287: for (j = 0; j < nz; j++) {
288: if (aa) {
289: PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g) ", aj[k], PetscRealPart(aa[k])));
290: } else {
291: PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]));
292: }
293: k++;
294: }
295: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
296: }
297: PetscFunctionReturn(PETSC_SUCCESS);
298: }
299: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
301: static PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
302: {
303: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
304: Mat_SeqAIJMKL *aijmkl_dest;
306: PetscFunctionBegin;
307: PetscCall(MatDuplicate_SeqAIJ(A, op, M));
308: aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
309: PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1));
310: aijmkl_dest->sparse_optimized = PETSC_FALSE;
311: if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
312: PetscFunctionReturn(PETSC_SUCCESS);
313: }
315: static PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
316: {
317: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
318: Mat_SeqAIJMKL *aijmkl;
320: PetscFunctionBegin;
321: if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
323: /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
324: * extra information and some different methods, call the AssemblyEnd
325: * routine for a MATSEQAIJ.
326: * I'm not sure if this is the best way to do this, but it avoids
327: * a lot of code duplication. */
328: a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
329: PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
331: /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
332: * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
333: aijmkl = (Mat_SeqAIJMKL *)A->spptr;
334: if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
335: PetscFunctionReturn(PETSC_SUCCESS);
336: }
338: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
339: static PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy)
340: {
341: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
342: const PetscScalar *x;
343: PetscScalar *y;
344: const MatScalar *aa;
345: PetscInt m = A->rmap->n;
346: PetscInt n = A->cmap->n;
347: PetscScalar alpha = 1.0;
348: PetscScalar beta = 0.0;
349: const PetscInt *aj, *ai;
350: char matdescra[6];
352: /* Variables not in MatMult_SeqAIJ. */
353: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
355: PetscFunctionBegin;
356: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
357: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
358: PetscCall(VecGetArrayRead(xx, &x));
359: PetscCall(VecGetArray(yy, &y));
360: aj = a->j; /* aj[k] gives column index for element aa[k]. */
361: aa = a->a; /* Nonzero elements stored row-by-row. */
362: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
364: /* Call MKL sparse BLAS routine to do the MatMult. */
365: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
367: PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
368: PetscCall(VecRestoreArrayRead(xx, &x));
369: PetscCall(VecRestoreArray(yy, &y));
370: PetscFunctionReturn(PETSC_SUCCESS);
371: }
372: #endif
374: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
375: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
376: {
377: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
378: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
379: const PetscScalar *x;
380: PetscScalar *y;
381: PetscObjectState state;
383: PetscFunctionBegin;
384: /* If there are no nonzero entries, zero yy and return immediately. */
385: if (!a->nz) {
386: PetscCall(VecGetArray(yy, &y));
387: PetscCall(PetscArrayzero(y, A->rmap->n));
388: PetscCall(VecRestoreArray(yy, &y));
389: PetscFunctionReturn(PETSC_SUCCESS);
390: }
392: PetscCall(VecGetArrayRead(xx, &x));
393: PetscCall(VecGetArray(yy, &y));
395: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
396: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
397: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
398: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
399: if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
401: /* Call MKL SpMV2 executor routine to do the MatMult. */
402: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
404: PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
405: PetscCall(VecRestoreArrayRead(xx, &x));
406: PetscCall(VecRestoreArray(yy, &y));
407: PetscFunctionReturn(PETSC_SUCCESS);
408: }
409: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
411: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
412: static PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy)
413: {
414: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
415: const PetscScalar *x;
416: PetscScalar *y;
417: const MatScalar *aa;
418: PetscInt m = A->rmap->n;
419: PetscInt n = A->cmap->n;
420: PetscScalar alpha = 1.0;
421: PetscScalar beta = 0.0;
422: const PetscInt *aj, *ai;
423: char matdescra[6];
425: /* Variables not in MatMultTranspose_SeqAIJ. */
426: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
428: PetscFunctionBegin;
429: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
430: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
431: PetscCall(VecGetArrayRead(xx, &x));
432: PetscCall(VecGetArray(yy, &y));
433: aj = a->j; /* aj[k] gives column index for element aa[k]. */
434: aa = a->a; /* Nonzero elements stored row-by-row. */
435: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
437: /* Call MKL sparse BLAS routine to do the MatMult. */
438: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
440: PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
441: PetscCall(VecRestoreArrayRead(xx, &x));
442: PetscCall(VecRestoreArray(yy, &y));
443: PetscFunctionReturn(PETSC_SUCCESS);
444: }
445: #endif
447: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
448: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
449: {
450: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
451: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
452: const PetscScalar *x;
453: PetscScalar *y;
454: PetscObjectState state;
456: PetscFunctionBegin;
457: /* If there are no nonzero entries, zero yy and return immediately. */
458: if (!a->nz) {
459: PetscCall(VecGetArray(yy, &y));
460: PetscCall(PetscArrayzero(y, A->cmap->n));
461: PetscCall(VecRestoreArray(yy, &y));
462: PetscFunctionReturn(PETSC_SUCCESS);
463: }
465: PetscCall(VecGetArrayRead(xx, &x));
466: PetscCall(VecGetArray(yy, &y));
468: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
469: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
470: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
471: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
472: if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
474: /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
475: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
477: PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
478: PetscCall(VecRestoreArrayRead(xx, &x));
479: PetscCall(VecRestoreArray(yy, &y));
480: PetscFunctionReturn(PETSC_SUCCESS);
481: }
482: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
484: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
485: static PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
486: {
487: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
488: const PetscScalar *x;
489: PetscScalar *y, *z;
490: const MatScalar *aa;
491: PetscInt m = A->rmap->n;
492: PetscInt n = A->cmap->n;
493: const PetscInt *aj, *ai;
494: PetscInt i;
496: /* Variables not in MatMultAdd_SeqAIJ. */
497: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
498: PetscScalar alpha = 1.0;
499: PetscScalar beta;
500: char matdescra[6];
502: PetscFunctionBegin;
503: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
504: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
506: PetscCall(VecGetArrayRead(xx, &x));
507: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
508: aj = a->j; /* aj[k] gives column index for element aa[k]. */
509: aa = a->a; /* Nonzero elements stored row-by-row. */
510: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
512: /* Call MKL sparse BLAS routine to do the MatMult. */
513: if (zz == yy) {
514: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
515: beta = 1.0;
516: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
517: } else {
518: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
519: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
520: beta = 0.0;
521: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
522: for (i = 0; i < m; i++) z[i] += y[i];
523: }
525: PetscCall(PetscLogFlops(2.0 * a->nz));
526: PetscCall(VecRestoreArrayRead(xx, &x));
527: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
528: PetscFunctionReturn(PETSC_SUCCESS);
529: }
530: #endif
532: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
533: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
534: {
535: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
536: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
537: const PetscScalar *x;
538: PetscScalar *y, *z;
539: PetscInt m = A->rmap->n;
540: PetscInt i;
542: /* Variables not in MatMultAdd_SeqAIJ. */
543: PetscObjectState state;
545: PetscFunctionBegin;
546: /* If there are no nonzero entries, set zz = yy and return immediately. */
547: if (!a->nz) {
548: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
549: PetscCall(PetscArraycpy(z, y, m));
550: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
551: PetscFunctionReturn(PETSC_SUCCESS);
552: }
554: PetscCall(VecGetArrayRead(xx, &x));
555: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
557: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
558: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
559: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
560: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
561: if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
563: /* Call MKL sparse BLAS routine to do the MatMult. */
564: if (zz == yy) {
565: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
566: * with alpha and beta both set to 1.0. */
567: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
568: } else {
569: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
570: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
571: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
572: for (i = 0; i < m; i++) z[i] += y[i];
573: }
575: PetscCall(PetscLogFlops(2.0 * a->nz));
576: PetscCall(VecRestoreArrayRead(xx, &x));
577: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
578: PetscFunctionReturn(PETSC_SUCCESS);
579: }
580: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
582: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
583: static PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
584: {
585: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
586: const PetscScalar *x;
587: PetscScalar *y, *z;
588: const MatScalar *aa;
589: PetscInt m = A->rmap->n;
590: PetscInt n = A->cmap->n;
591: const PetscInt *aj, *ai;
592: PetscInt i;
594: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
595: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
596: PetscScalar alpha = 1.0;
597: PetscScalar beta;
598: char matdescra[6];
600: PetscFunctionBegin;
601: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
602: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
604: PetscCall(VecGetArrayRead(xx, &x));
605: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
606: aj = a->j; /* aj[k] gives column index for element aa[k]. */
607: aa = a->a; /* Nonzero elements stored row-by-row. */
608: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
610: /* Call MKL sparse BLAS routine to do the MatMult. */
611: if (zz == yy) {
612: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
613: beta = 1.0;
614: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
615: } else {
616: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
617: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
618: beta = 0.0;
619: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
620: for (i = 0; i < n; i++) z[i] += y[i];
621: }
623: PetscCall(PetscLogFlops(2.0 * a->nz));
624: PetscCall(VecRestoreArrayRead(xx, &x));
625: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
626: PetscFunctionReturn(PETSC_SUCCESS);
627: }
628: #endif
630: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
631: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
632: {
633: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
634: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
635: const PetscScalar *x;
636: PetscScalar *y, *z;
637: PetscInt n = A->cmap->n;
638: PetscInt i;
639: PetscObjectState state;
641: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
643: PetscFunctionBegin;
644: /* If there are no nonzero entries, set zz = yy and return immediately. */
645: if (!a->nz) {
646: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
647: PetscCall(PetscArraycpy(z, y, n));
648: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
649: PetscFunctionReturn(PETSC_SUCCESS);
650: }
652: PetscCall(VecGetArrayRead(xx, &x));
653: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
655: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
656: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
657: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
658: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
659: if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
661: /* Call MKL sparse BLAS routine to do the MatMult. */
662: if (zz == yy) {
663: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
664: * with alpha and beta both set to 1.0. */
665: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
666: } else {
667: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
668: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
669: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
670: for (i = 0; i < n; i++) z[i] += y[i];
671: }
673: PetscCall(PetscLogFlops(2.0 * a->nz));
674: PetscCall(VecRestoreArrayRead(xx, &x));
675: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
676: PetscFunctionReturn(PETSC_SUCCESS);
677: }
678: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
680: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
681: static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
682: {
683: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
684: sparse_matrix_t csrA, csrB, csrC;
685: PetscInt nrows, ncols;
686: struct matrix_descr descr_type_gen;
687: PetscObjectState state;
689: PetscFunctionBegin;
690: /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
691: * not handle sparse matrices with zero rows or columns. */
692: if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
693: else nrows = A->cmap->N;
694: if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
695: else ncols = B->rmap->N;
697: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
698: if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
699: PetscCall(PetscObjectStateGet((PetscObject)B, &state));
700: if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
701: csrA = a->csrA;
702: csrB = b->csrA;
703: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
705: if (csrA && csrB) {
706: PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
707: } else {
708: csrC = NULL;
709: }
711: PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C));
712: PetscFunctionReturn(PETSC_SUCCESS);
713: }
715: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
716: {
717: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
718: sparse_matrix_t csrA, csrB, csrC;
719: struct matrix_descr descr_type_gen;
720: PetscObjectState state;
722: PetscFunctionBegin;
723: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
724: if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
725: PetscCall(PetscObjectStateGet((PetscObject)B, &state));
726: if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
727: csrA = a->csrA;
728: csrB = b->csrA;
729: csrC = c->csrA;
730: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
732: if (csrA && csrB) {
733: PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
734: } else {
735: csrC = NULL;
736: }
738: /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
739: PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
740: PetscFunctionReturn(PETSC_SUCCESS);
741: }
743: PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
744: {
745: PetscFunctionBegin;
746: PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
747: PetscFunctionReturn(PETSC_SUCCESS);
748: }
750: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
751: {
752: PetscFunctionBegin;
753: PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
754: PetscFunctionReturn(PETSC_SUCCESS);
755: }
757: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
758: {
759: PetscFunctionBegin;
760: PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
761: PetscFunctionReturn(PETSC_SUCCESS);
762: }
764: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
765: {
766: PetscFunctionBegin;
767: PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
768: PetscFunctionReturn(PETSC_SUCCESS);
769: }
771: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
772: {
773: PetscFunctionBegin;
774: PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
775: PetscFunctionReturn(PETSC_SUCCESS);
776: }
778: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
779: {
780: PetscFunctionBegin;
781: PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
782: PetscFunctionReturn(PETSC_SUCCESS);
783: }
785: static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
786: {
787: Mat_Product *product = C->product;
788: Mat A = product->A, B = product->B;
790: PetscFunctionBegin;
791: PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C));
792: PetscFunctionReturn(PETSC_SUCCESS);
793: }
795: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
796: {
797: Mat_Product *product = C->product;
798: Mat A = product->A, B = product->B;
799: PetscReal fill = product->fill;
801: PetscFunctionBegin;
802: PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C));
803: C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
804: PetscFunctionReturn(PETSC_SUCCESS);
805: }
807: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C)
808: {
809: Mat Ct;
810: Vec zeros;
811: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
812: sparse_matrix_t csrA, csrP, csrC;
813: PetscBool set, flag;
814: struct matrix_descr descr_type_sym;
815: PetscObjectState state;
817: PetscFunctionBegin;
818: PetscCall(MatIsSymmetricKnown(A, &set, &flag));
819: PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
821: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
822: if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
823: PetscCall(PetscObjectStateGet((PetscObject)P, &state));
824: if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
825: csrA = a->csrA;
826: csrP = p->csrA;
827: csrC = c->csrA;
828: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
829: descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
830: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
832: /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
833: PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);
835: /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
836: * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
837: * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
838: * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
839: * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
840: * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
841: * the full matrix. */
842: PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
843: PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct));
844: PetscCall(MatCreateVecs(C, &zeros, NULL));
845: PetscCall(VecSetFromOptions(zeros));
846: PetscCall(VecZeroEntries(zeros));
847: PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES));
848: PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN));
849: /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
850: PetscCall(MatProductCreateWithMat(A, P, NULL, C));
851: PetscCall(MatProductSetType(C, MATPRODUCT_PtAP));
852: PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
853: PetscCall(VecDestroy(&zeros));
854: PetscCall(MatDestroy(&Ct));
855: PetscFunctionReturn(PETSC_SUCCESS);
856: }
858: PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
859: {
860: Mat_Product *product = C->product;
861: Mat A = product->A, P = product->B;
862: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
863: sparse_matrix_t csrA, csrP, csrC;
864: struct matrix_descr descr_type_sym;
865: PetscObjectState state;
867: PetscFunctionBegin;
868: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
869: if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
870: PetscCall(PetscObjectStateGet((PetscObject)P, &state));
871: if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
872: csrA = a->csrA;
873: csrP = p->csrA;
874: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
875: descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
876: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
878: /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
879: if (csrP && csrA) {
880: PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
881: } else {
882: csrC = NULL;
883: }
885: /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
886: * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
887: * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
888: * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
889: * is guaranteed. */
890: PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C));
892: C->ops->productnumeric = MatProductNumeric_PtAP;
893: PetscFunctionReturn(PETSC_SUCCESS);
894: }
896: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
897: {
898: PetscFunctionBegin;
899: C->ops->productsymbolic = MatProductSymbolic_AB;
900: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
901: PetscFunctionReturn(PETSC_SUCCESS);
902: }
904: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
905: {
906: PetscFunctionBegin;
907: C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
908: PetscFunctionReturn(PETSC_SUCCESS);
909: }
911: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
912: {
913: PetscFunctionBegin;
914: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
915: C->ops->productsymbolic = MatProductSymbolic_ABt;
916: PetscFunctionReturn(PETSC_SUCCESS);
917: }
919: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
920: {
921: Mat_Product *product = C->product;
922: Mat A = product->A;
923: PetscBool set, flag;
925: PetscFunctionBegin;
926: if (PetscDefined(USE_COMPLEX)) {
927: /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
928: * We do this in several other locations in this file. This works for the time being, but these
929: * routines are considered unsafe and may be removed from the MatProduct code in the future.
930: * TODO: Add proper MATSEQAIJMKL implementations */
931: C->ops->productsymbolic = NULL;
932: } else {
933: /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
934: PetscCall(MatIsSymmetricKnown(A, &set, &flag));
935: if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
936: else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
937: /* we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
938: * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
939: }
940: PetscFunctionReturn(PETSC_SUCCESS);
941: }
943: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
944: {
945: PetscFunctionBegin;
946: C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
947: PetscFunctionReturn(PETSC_SUCCESS);
948: }
950: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
951: {
952: PetscFunctionBegin;
953: C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
954: PetscFunctionReturn(PETSC_SUCCESS);
955: }
957: PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
958: {
959: Mat_Product *product = C->product;
961: PetscFunctionBegin;
962: switch (product->type) {
963: case MATPRODUCT_AB:
964: PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C));
965: break;
966: case MATPRODUCT_AtB:
967: PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C));
968: break;
969: case MATPRODUCT_ABt:
970: PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C));
971: break;
972: case MATPRODUCT_PtAP:
973: PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C));
974: break;
975: case MATPRODUCT_RARt:
976: PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C));
977: break;
978: case MATPRODUCT_ABC:
979: PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C));
980: break;
981: default:
982: break;
983: }
984: PetscFunctionReturn(PETSC_SUCCESS);
985: }
986: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
988: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
989: * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL()
990: * routine, but can also be used to convert an assembled SeqAIJ matrix
991: * into a SeqAIJMKL one. */
992: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
993: {
994: Mat B = *newmat;
995: Mat_SeqAIJMKL *aijmkl;
996: PetscBool set;
997: PetscBool sametype;
999: PetscFunctionBegin;
1000: if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
1002: PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
1003: if (sametype) PetscFunctionReturn(PETSC_SUCCESS);
1005: PetscCall(PetscNew(&aijmkl));
1006: B->spptr = (void *)aijmkl;
1008: /* Set function pointers for methods that we inherit from AIJ but override.
1009: * We also parse some command line options below, since those determine some of the methods we point to. */
1010: B->ops->duplicate = MatDuplicate_SeqAIJMKL;
1011: B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
1012: B->ops->destroy = MatDestroy_SeqAIJMKL;
1014: aijmkl->sparse_optimized = PETSC_FALSE;
1015: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1016: aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
1017: #else
1018: aijmkl->no_SpMV2 = PETSC_TRUE;
1019: #endif
1020: aijmkl->eager_inspection = PETSC_FALSE;
1022: /* Parse command line options. */
1023: PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
1024: PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set));
1025: PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set));
1026: PetscOptionsEnd();
1027: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1028: if (!aijmkl->no_SpMV2) {
1029: PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"));
1030: aijmkl->no_SpMV2 = PETSC_TRUE;
1031: }
1032: #endif
1034: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1035: B->ops->mult = MatMult_SeqAIJMKL_SpMV2;
1036: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2;
1037: B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2;
1038: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1039: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1040: B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL;
1041: B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1042: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1043: B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1044: B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1045: #if !defined(PETSC_USE_COMPLEX)
1046: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1047: #else
1048: B->ops->ptapnumeric = NULL;
1049: #endif
1050: #endif
1051: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1053: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1054: /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1055: * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1056: * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1057: * versions in which the older interface has not been deprecated, we use the old interface. */
1058: if (aijmkl->no_SpMV2) {
1059: B->ops->mult = MatMult_SeqAIJMKL;
1060: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL;
1061: B->ops->multadd = MatMultAdd_SeqAIJMKL;
1062: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1063: }
1064: #endif
1066: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ));
1068: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL));
1069: *newmat = B;
1070: PetscFunctionReturn(PETSC_SUCCESS);
1071: }
1073: /*@C
1074: MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`.
1076: Collective
1078: Input Parameters:
1079: + comm - MPI communicator, set to `PETSC_COMM_SELF`
1080: . m - number of rows
1081: . n - number of columns
1082: . nz - number of nonzeros per row (same for all rows)
1083: - nnz - array containing the number of nonzeros in the various rows
1084: (possibly different for each row) or `NULL`
1086: Output Parameter:
1087: . A - the matrix
1089: Options Database Keys:
1090: + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1091: - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection,
1092: performing this step the first time the matrix is applied
1094: Level: intermediate
1096: Notes:
1097: If `nnz` is given then `nz` is ignored
1099: This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS
1100: routines from Intel MKL whenever possible.
1102: If the installed version of MKL supports the "SpMV2" sparse
1103: inspector-executor routines, then those are used by default.
1105: `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()`
1106: (for symmetric A) operations are currently supported.
1107: MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`.
1109: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
1110: @*/
1111: PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
1112: {
1113: PetscFunctionBegin;
1114: PetscCall(MatCreate(comm, A));
1115: PetscCall(MatSetSizes(*A, m, n, m, n));
1116: PetscCall(MatSetType(*A, MATSEQAIJMKL));
1117: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
1118: PetscFunctionReturn(PETSC_SUCCESS);
1119: }
1121: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1122: {
1123: PetscFunctionBegin;
1124: PetscCall(MatSetType(A, MATSEQAIJ));
1125: PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
1126: PetscFunctionReturn(PETSC_SUCCESS);
1127: }