Actual source code: baijmkl.c
1: /*
2: Defines basic operations for the MATSEQBAIJMKL matrix class.
3: Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
4: wherever possible. If used MKL version is older than 11.3 PETSc default
5: code for sparse matrix operations is used.
6: */
8: #include <../src/mat/impls/baij/seq/baij.h>
9: #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h>
10: #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
11: #define MKL_ILP64
12: #endif
13: #include <mkl_spblas.h>
15: static PetscBool PetscSeqBAIJSupportsZeroBased(void)
16: {
17: static PetscBool set = PETSC_FALSE, value;
18: int n = 1, ia[1], ja[1];
19: float a[1];
20: sparse_status_t status;
21: sparse_matrix_t A;
23: if (!set) {
24: status = mkl_sparse_s_create_bsr(&A, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)n, (MKL_INT)n, (MKL_INT)n, (MKL_INT *)ia, (MKL_INT *)ia, (MKL_INT *)ja, a);
25: value = (status != SPARSE_STATUS_NOT_SUPPORTED) ? PETSC_TRUE : PETSC_FALSE;
26: (void)mkl_sparse_destroy(A);
27: set = PETSC_TRUE;
28: }
29: return value;
30: }
32: typedef struct {
33: PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
34: sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */
35: struct matrix_descr descr;
36: PetscInt *ai1;
37: PetscInt *aj1;
38: } Mat_SeqBAIJMKL;
40: static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
41: extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat, MatAssemblyType);
43: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
44: {
45: /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
46: /* so we will ignore 'MatType type'. */
47: Mat B = *newmat;
48: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
50: PetscFunctionBegin;
51: if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
53: /* Reset the original function pointers. */
54: B->ops->duplicate = MatDuplicate_SeqBAIJ;
55: B->ops->assemblyend = MatAssemblyEnd_SeqBAIJ;
56: B->ops->destroy = MatDestroy_SeqBAIJ;
57: B->ops->multtranspose = MatMultTranspose_SeqBAIJ;
58: B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
59: B->ops->scale = MatScale_SeqBAIJ;
60: B->ops->diagonalscale = MatDiagonalScale_SeqBAIJ;
61: B->ops->axpy = MatAXPY_SeqBAIJ;
63: switch (A->rmap->bs) {
64: case 1:
65: B->ops->mult = MatMult_SeqBAIJ_1;
66: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
67: break;
68: case 2:
69: B->ops->mult = MatMult_SeqBAIJ_2;
70: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
71: break;
72: case 3:
73: B->ops->mult = MatMult_SeqBAIJ_3;
74: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
75: break;
76: case 4:
77: B->ops->mult = MatMult_SeqBAIJ_4;
78: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
79: break;
80: case 5:
81: B->ops->mult = MatMult_SeqBAIJ_5;
82: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
83: break;
84: case 6:
85: B->ops->mult = MatMult_SeqBAIJ_6;
86: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
87: break;
88: case 7:
89: B->ops->mult = MatMult_SeqBAIJ_7;
90: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
91: break;
92: case 11:
93: B->ops->mult = MatMult_SeqBAIJ_11;
94: B->ops->multadd = MatMultAdd_SeqBAIJ_11;
95: break;
96: case 15:
97: B->ops->mult = MatMult_SeqBAIJ_15_ver1;
98: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
99: break;
100: default:
101: B->ops->mult = MatMult_SeqBAIJ_N;
102: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
103: break;
104: }
105: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", NULL));
107: /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
108: * simply involves destroying the MKL sparse matrix handle and then freeing
109: * the spptr pointer. */
110: if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL *)B->spptr;
112: if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA);
113: PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
114: PetscCall(PetscFree(B->spptr));
116: /* Change the type of B to MATSEQBAIJ. */
117: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
119: *newmat = B;
120: PetscFunctionReturn(PETSC_SUCCESS);
121: }
123: static PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
124: {
125: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
127: PetscFunctionBegin;
128: if (baijmkl) {
129: /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
130: if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA);
131: PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
132: PetscCall(PetscFree(A->spptr));
133: }
135: /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
136: * to destroy everything that remains. */
137: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ));
138: PetscCall(MatDestroy_SeqBAIJ(A));
139: PetscFunctionReturn(PETSC_SUCCESS);
140: }
142: static PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
143: {
144: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
145: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
146: PetscInt mbs, nbs, nz, bs;
147: MatScalar *aa;
148: PetscInt *aj, *ai;
149: PetscInt i;
151: PetscFunctionBegin;
152: if (baijmkl->sparse_optimized) {
153: /* Matrix has been previously assembled and optimized. Must destroy old
154: * matrix handle before running the optimization step again. */
155: PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
156: PetscCallMKL(mkl_sparse_destroy(baijmkl->bsrA));
157: }
158: baijmkl->sparse_optimized = PETSC_FALSE;
160: /* Now perform the SpMV2 setup and matrix optimization. */
161: baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
162: baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
163: baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
164: mbs = a->mbs;
165: nbs = a->nbs;
166: nz = a->nz;
167: bs = A->rmap->bs;
168: aa = a->a;
170: if ((nz != 0) & !A->structure_only) {
171: /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
172: * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
173: if (PetscSeqBAIJSupportsZeroBased()) {
174: aj = a->j;
175: ai = a->i;
176: PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)mbs, (MKL_INT)nbs, (MKL_INT)bs, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa));
177: } else {
178: PetscCall(PetscMalloc2(mbs + 1, &ai, nz, &aj));
179: for (i = 0; i < mbs + 1; i++) ai[i] = a->i[i] + 1;
180: for (i = 0; i < nz; i++) aj[i] = a->j[i] + 1;
181: aa = a->a;
182: PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)mbs, (MKL_INT)nbs, (MKL_INT)bs, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa));
183: baijmkl->ai1 = ai;
184: baijmkl->aj1 = aj;
185: }
186: PetscCallMKL(mkl_sparse_set_mv_hint(baijmkl->bsrA, SPARSE_OPERATION_NON_TRANSPOSE, baijmkl->descr, 1000));
187: PetscCallMKL(mkl_sparse_set_memory_hint(baijmkl->bsrA, SPARSE_MEMORY_AGGRESSIVE));
188: PetscCallMKL(mkl_sparse_optimize(baijmkl->bsrA));
189: baijmkl->sparse_optimized = PETSC_TRUE;
190: }
191: PetscFunctionReturn(PETSC_SUCCESS);
192: }
194: static PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
195: {
196: Mat_SeqBAIJMKL *baijmkl;
197: Mat_SeqBAIJMKL *baijmkl_dest;
199: PetscFunctionBegin;
200: PetscCall(MatDuplicate_SeqBAIJ(A, op, M));
201: baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
202: PetscCall(PetscNew(&baijmkl_dest));
203: (*M)->spptr = (void *)baijmkl_dest;
204: PetscCall(PetscMemcpy(baijmkl_dest, baijmkl, sizeof(Mat_SeqBAIJMKL)));
205: baijmkl_dest->sparse_optimized = PETSC_FALSE;
206: PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
207: PetscFunctionReturn(PETSC_SUCCESS);
208: }
210: static PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
211: {
212: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
213: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
214: const PetscScalar *x;
215: PetscScalar *y;
217: PetscFunctionBegin;
218: /* If there are no nonzero entries, zero yy and return immediately. */
219: if (!a->nz) {
220: PetscCall(VecSet(yy, 0.0));
221: PetscFunctionReturn(PETSC_SUCCESS);
222: }
224: PetscCall(VecGetArrayRead(xx, &x));
225: PetscCall(VecGetArray(yy, &y));
227: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
228: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
229: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
230: if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
232: /* Call MKL SpMV2 executor routine to do the MatMult. */
233: PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y));
235: PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs));
236: PetscCall(VecRestoreArrayRead(xx, &x));
237: PetscCall(VecRestoreArray(yy, &y));
238: PetscFunctionReturn(PETSC_SUCCESS);
239: }
241: static PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
242: {
243: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
244: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
245: const PetscScalar *x;
246: PetscScalar *y;
248: PetscFunctionBegin;
249: /* If there are no nonzero entries, zero yy and return immediately. */
250: if (!a->nz) {
251: PetscCall(VecSet(yy, 0.0));
252: PetscFunctionReturn(PETSC_SUCCESS);
253: }
255: PetscCall(VecGetArrayRead(xx, &x));
256: PetscCall(VecGetArray(yy, &y));
258: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
259: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
260: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
261: if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
263: /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
264: PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y));
266: PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs));
267: PetscCall(VecRestoreArrayRead(xx, &x));
268: PetscCall(VecRestoreArray(yy, &y));
269: PetscFunctionReturn(PETSC_SUCCESS);
270: }
272: static PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
273: {
274: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
275: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
276: const PetscScalar *x;
277: PetscScalar *y, *z;
278: PetscInt m = a->mbs * A->rmap->bs;
279: PetscInt i;
281: PetscFunctionBegin;
282: /* If there are no nonzero entries, set zz = yy and return immediately. */
283: if (!a->nz) {
284: PetscCall(VecCopy(yy, zz));
285: PetscFunctionReturn(PETSC_SUCCESS);
286: }
288: PetscCall(VecGetArrayRead(xx, &x));
289: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
291: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
292: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
293: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
294: if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
296: /* Call MKL sparse BLAS routine to do the MatMult. */
297: if (zz == yy) {
298: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
299: * with alpha and beta both set to 1.0. */
300: PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z));
301: } else {
302: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
303: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
304: PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z));
305: for (i = 0; i < m; i++) z[i] += y[i];
306: }
308: PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz));
309: PetscCall(VecRestoreArrayRead(xx, &x));
310: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
311: PetscFunctionReturn(PETSC_SUCCESS);
312: }
314: static PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
315: {
316: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
317: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
318: const PetscScalar *x;
319: PetscScalar *y, *z;
320: PetscInt n = a->nbs * A->rmap->bs;
321: PetscInt i;
322: /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
324: PetscFunctionBegin;
325: /* If there are no nonzero entries, set zz = yy and return immediately. */
326: if (!a->nz) {
327: PetscCall(VecCopy(yy, zz));
328: PetscFunctionReturn(PETSC_SUCCESS);
329: }
331: PetscCall(VecGetArrayRead(xx, &x));
332: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
334: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
335: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
336: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
337: if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
339: /* Call MKL sparse BLAS routine to do the MatMult. */
340: if (zz == yy) {
341: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
342: * with alpha and beta both set to 1.0. */
343: PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z));
344: } else {
345: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
346: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
347: PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z));
348: for (i = 0; i < n; i++) z[i] += y[i];
349: }
351: PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz));
352: PetscCall(VecRestoreArrayRead(xx, &x));
353: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
354: PetscFunctionReturn(PETSC_SUCCESS);
355: }
357: static PetscErrorCode MatScale_SeqBAIJMKL(Mat inA, PetscScalar alpha)
358: {
359: PetscFunctionBegin;
360: PetscCall(MatScale_SeqBAIJ(inA, alpha));
361: PetscCall(MatSeqBAIJMKL_create_mkl_handle(inA));
362: PetscFunctionReturn(PETSC_SUCCESS);
363: }
365: static PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A, Vec ll, Vec rr)
366: {
367: PetscFunctionBegin;
368: PetscCall(MatDiagonalScale_SeqBAIJ(A, ll, rr));
369: PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
370: PetscFunctionReturn(PETSC_SUCCESS);
371: }
373: static PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y, PetscScalar a, Mat X, MatStructure str)
374: {
375: PetscFunctionBegin;
376: PetscCall(MatAXPY_SeqBAIJ(Y, a, X, str));
377: if (str == SAME_NONZERO_PATTERN) {
378: /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
379: PetscCall(MatSeqBAIJMKL_create_mkl_handle(Y));
380: }
381: PetscFunctionReturn(PETSC_SUCCESS);
382: }
383: /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
384: * SeqBAIJMKL matrix. This routine is called by the MatCreate_SeqMKLBAIJ()
385: * routine, but can also be used to convert an assembled SeqBAIJ matrix
386: * into a SeqBAIJMKL one. */
387: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
388: {
389: Mat B = *newmat;
390: Mat_SeqBAIJMKL *baijmkl;
391: PetscBool sametype;
393: PetscFunctionBegin;
394: if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
396: PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
397: if (sametype) PetscFunctionReturn(PETSC_SUCCESS);
399: PetscCall(PetscNew(&baijmkl));
400: B->spptr = (void *)baijmkl;
402: /* Set function pointers for methods that we inherit from BAIJ but override.
403: * We also parse some command line options below, since those determine some of the methods we point to. */
404: B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL;
406: baijmkl->sparse_optimized = PETSC_FALSE;
408: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatScale_C", MatScale_SeqBAIJMKL));
409: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", MatConvert_SeqBAIJMKL_SeqBAIJ));
411: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJMKL));
412: *newmat = B;
413: PetscFunctionReturn(PETSC_SUCCESS);
414: }
416: static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
417: {
418: PetscFunctionBegin;
419: if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
420: PetscCall(MatAssemblyEnd_SeqBAIJ(A, mode));
421: PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
422: A->ops->destroy = MatDestroy_SeqBAIJMKL;
423: A->ops->mult = MatMult_SeqBAIJMKL_SpMV2;
424: A->ops->multtranspose = MatMultTranspose_SeqBAIJMKL_SpMV2;
425: A->ops->multadd = MatMultAdd_SeqBAIJMKL_SpMV2;
426: A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
427: A->ops->scale = MatScale_SeqBAIJMKL;
428: A->ops->diagonalscale = MatDiagonalScale_SeqBAIJMKL;
429: A->ops->axpy = MatAXPY_SeqBAIJMKL;
430: A->ops->duplicate = MatDuplicate_SeqBAIJMKL;
431: PetscFunctionReturn(PETSC_SUCCESS);
432: }
434: /*@
435: MatCreateSeqBAIJMKL - Creates a sparse matrix of type `MATSEQBAIJMKL`.
436: This type inherits from `MATSEQBAIJ` and is largely identical, but uses sparse BLAS
437: routines from Intel MKL whenever possible.
439: Input Parameters:
440: + comm - MPI communicator, set to `PETSC_COMM_SELF`
441: . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
442: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
443: . m - number of rows
444: . n - number of columns
445: . nz - number of nonzero blocks per block row (same for all rows)
446: - nnz - array containing the number of nonzero blocks in the various block rows
447: (possibly different for each block row) or `NULL`
449: Output Parameter:
450: . A - the matrix
452: It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
453: MatXXXXSetPreallocation() paradigm instead of this routine directly.
454: [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
456: Options Database Keys:
457: + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
458: - -mat_block_size - size of the blocks to use
460: Level: intermediate
462: Notes:
463: The number of rows and columns must be divisible by blocksize.
465: If the `nnz` parameter is given then the `nz` parameter is ignored
467: A nonzero block is any block that as 1 or more nonzeros in it
469: `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()`
470: operations are currently supported.
471: If the installed version of MKL supports the "SpMV2" sparse
472: inspector-executor routines, then those are used by default.
473: Default PETSc kernels are used otherwise.
475: The `MATSEQBAIJ` format is fully compatible with standard Fortran
476: storage. That is, the stored row and column indices can begin at
477: either one (as in Fortran) or zero. See the users' manual for details.
479: Specify the preallocated storage with either nz or nnz (not both).
480: Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
481: allocation. See [Sparse Matrices](sec_matsparse) for details.
482: matrices.
484: .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
485: @*/
486: PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
487: {
488: PetscFunctionBegin;
489: PetscCall(MatCreate(comm, A));
490: PetscCall(MatSetSizes(*A, m, n, m, n));
491: PetscCall(MatSetType(*A, MATSEQBAIJMKL));
492: PetscCall(MatSeqBAIJSetPreallocation_SeqBAIJ(*A, bs, nz, (PetscInt *)nnz));
493: PetscFunctionReturn(PETSC_SUCCESS);
494: }
496: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
497: {
498: PetscFunctionBegin;
499: PetscCall(MatSetType(A, MATSEQBAIJ));
500: PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &A));
501: PetscFunctionReturn(PETSC_SUCCESS);
502: }