Actual source code: ex237.c
1: static char help[] = "Mini-app to benchmark matrix--matrix multiplication\n\n";
3: /*
4: See the paper below for more information
6: "KSPHPDDM and PCHPDDM: Extending PETSc with Robust Overlapping Schwarz Preconditioners and Advanced Krylov Methods",
7: P. Jolivet, J. E. Roman, and S. Zampini (2020).
8: */
10: #include <petsc.h>
12: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
13: #include <mkl.h>
14: #define PetscCallMKLSparse(func, args) \
15: do { \
16: sparse_status_t __ierr; \
17: PetscStackPushExternal(#func); \
18: __ierr = func args; \
19: PetscStackPop; \
20: PetscCheck(__ierr == SPARSE_STATUS_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s(): error code %d", #func, (int)__ierr); \
21: } while (0)
22: #else
23: #define PetscCallMKLSparse(func, args) \
24: do { \
25: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No MKL support"); \
26: } while (0)
27: #endif
29: int main(int argc, char **argv)
30: {
31: Mat A, C, D, E;
32: PetscInt nbs = 10, ntype = 10, nN = 8, m, M, trial = 5;
33: PetscViewer viewer;
34: PetscInt bs[10], N[8];
35: char *type[10];
36: PetscMPIInt size;
37: PetscBool flg, cuda, maij = PETSC_FALSE, check = PETSC_FALSE, abt = PETSC_FALSE, atb = PETSC_FALSE, convert = PETSC_FALSE, mkl;
38: char file[PETSC_MAX_PATH_LEN];
40: PetscFunctionBeginUser;
41: PetscCall(PetscInitialize(&argc, &argv, NULL, help));
42: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
43: PetscCheck(size == 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only");
44: PetscCall(PetscOptionsGetString(NULL, NULL, "-f", file, PETSC_MAX_PATH_LEN, &flg));
45: PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate binary file with the -f option");
46: PetscCall(PetscOptionsGetInt(NULL, NULL, "-trial", &trial, NULL));
47: PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-bs", bs, &nbs, &flg));
48: if (!flg) {
49: nbs = 1;
50: bs[0] = 1;
51: }
52: PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-N", N, &nN, &flg));
53: if (!flg) {
54: nN = 8;
55: N[0] = 1;
56: N[1] = 2;
57: N[2] = 4;
58: N[3] = 8;
59: N[4] = 16;
60: N[5] = 32;
61: N[6] = 64;
62: N[7] = 128;
63: }
64: PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-type", type, &ntype, &flg));
65: if (!flg) {
66: ntype = 1;
67: PetscCall(PetscStrallocpy(MATSEQAIJ, &type[0]));
68: }
69: PetscCall(PetscOptionsGetBool(NULL, NULL, "-check", &check, NULL));
70: PetscCall(PetscOptionsGetBool(NULL, NULL, "-ABt", &abt, NULL));
71: PetscCall(PetscOptionsGetBool(NULL, NULL, "-AtB", &atb, NULL));
72: PetscCall(PetscOptionsGetBool(NULL, NULL, "-convert_aij", &convert, NULL));
73: for (PetscInt j = 0; j < nbs; ++j) {
74: PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer));
75: PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
76: PetscCall(MatSetFromOptions(A));
77: PetscCall(MatLoad(A, viewer));
78: PetscCall(PetscViewerDestroy(&viewer));
79: PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, ""));
80: PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate a MatAIJ input matrix");
81: PetscCall(MatGetSize(A, &m, &M));
82: if (m == M) {
83: Mat oA;
84: PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &oA));
85: PetscCall(MatAXPY(A, 1.0, oA, DIFFERENT_NONZERO_PATTERN));
86: PetscCall(MatDestroy(&oA));
87: }
88: PetscCall(MatGetLocalSize(A, &m, NULL));
89: PetscCall(MatGetSize(A, &M, NULL));
90: if (bs[j] > 1) {
91: Mat T, Tt, B;
92: const PetscScalar *ptr;
93: PetscScalar *val, *Aa;
94: const PetscInt *Ai, *Aj;
95: PetscInt An, i, k;
96: PetscBool done;
98: PetscCall(MatCreateDense(PETSC_COMM_SELF, bs[j], bs[j], bs[j], bs[j], NULL, &T));
99: PetscCall(MatSetRandom(T, NULL));
100: PetscCall(MatTranspose(T, MAT_INITIAL_MATRIX, &Tt));
101: PetscCall(MatAXPY(T, 1.0, Tt, SAME_NONZERO_PATTERN));
102: PetscCall(MatDestroy(&Tt));
103: PetscCall(MatDenseGetArrayRead(T, &ptr));
104: PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done));
105: PetscCheck(done && An == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes");
106: PetscCall(MatSeqAIJGetArray(A, &Aa));
107: PetscCall(MatCreate(PETSC_COMM_WORLD, &B));
108: PetscCall(MatSetType(B, MATSEQBAIJ));
109: PetscCall(MatSetSizes(B, bs[j] * An, bs[j] * An, PETSC_DECIDE, PETSC_DECIDE));
110: PetscCall(PetscMalloc1(Ai[An] * bs[j] * bs[j], &val));
111: for (i = 0; i < Ai[An]; ++i)
112: for (k = 0; k < bs[j] * bs[j]; ++k) val[i * bs[j] * bs[j] + k] = Aa[i] * ptr[k];
113: PetscCall(MatSetOption(B, MAT_ROW_ORIENTED, PETSC_FALSE));
114: PetscCall(MatSeqBAIJSetPreallocationCSR(B, bs[j], Ai, Aj, val));
115: PetscCall(PetscFree(val));
116: PetscCall(MatSeqAIJRestoreArray(A, &Aa));
117: PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done));
118: PetscCall(MatDenseRestoreArrayRead(T, &ptr));
119: PetscCall(MatDestroy(&T));
120: PetscCall(MatDestroy(&A));
121: A = B;
122: }
123: /* reconvert back to SeqAIJ before converting to the desired type later */
124: if (!convert) E = A;
125: PetscCall(MatConvert(A, MATSEQAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &E));
126: PetscCall(MatSetOption(E, MAT_SYMMETRIC, PETSC_TRUE));
127: for (PetscInt i = 0; i < ntype; ++i) {
128: char *tmp = NULL;
129: PetscInt *ia_ptr, *ja_ptr, k;
130: PetscScalar *a_ptr;
131: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
132: struct matrix_descr descr;
133: sparse_matrix_t spr;
134: descr.type = SPARSE_MATRIX_TYPE_GENERAL;
135: descr.diag = SPARSE_DIAG_NON_UNIT;
136: #endif
137: if (convert) PetscCall(MatDestroy(&A));
138: PetscCall(PetscStrstr(type[i], "mkl", &tmp));
139: if (tmp) {
140: size_t mlen, tlen;
141: char base[256];
143: mkl = PETSC_TRUE;
144: PetscCall(PetscStrlen(tmp, &mlen));
145: PetscCall(PetscStrlen(type[i], &tlen));
146: PetscCall(PetscStrncpy(base, type[i], tlen - mlen + 1));
147: PetscCall(MatConvert(E, base, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A));
148: } else {
149: mkl = PETSC_FALSE;
150: PetscCall(PetscStrstr(type[i], "maij", &tmp));
151: if (!tmp) {
152: PetscCall(MatConvert(E, type[i], convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A));
153: } else {
154: PetscCall(MatConvert(E, MATAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A));
155: maij = PETSC_TRUE;
156: }
157: }
158: PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &cuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
159: if (mkl) {
160: const PetscInt *Ai, *Aj;
161: PetscInt An;
162: PetscBool done;
164: PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATSEQBAIJ, MATSEQSBAIJ, ""));
165: PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Not implemented");
166: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
167: PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done));
168: PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes");
169: PetscCall(PetscMalloc1(An + 1, &ia_ptr));
170: PetscCall(PetscMalloc1(Ai[An], &ja_ptr));
171: if (flg) { /* SeqAIJ */
172: for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k];
173: for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k];
174: PetscCall(MatSeqAIJGetArray(A, &a_ptr));
175: PetscCallMKLSparse(mkl_sparse_d_create_csr, (&spr, SPARSE_INDEX_BASE_ZERO, An, An, ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
176: } else {
177: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &flg));
178: if (flg) {
179: for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
180: for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
181: PetscCall(MatSeqBAIJGetArray(A, &a_ptr));
182: PetscCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
183: } else {
184: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &flg));
185: if (flg) {
186: for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
187: for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
188: PetscCall(MatSeqSBAIJGetArray(A, &a_ptr));
189: PetscCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
190: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
191: descr.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
192: descr.mode = SPARSE_FILL_MODE_UPPER;
193: descr.diag = SPARSE_DIAG_NON_UNIT;
194: #endif
195: }
196: }
197: }
198: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
199: PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done));
200: }
202: PetscCall(MatViewFromOptions(A, NULL, "-A_view"));
204: for (k = 0; k < nN; ++k) {
205: MatType Atype, Ctype;
206: PetscInt AM, AN, CM, CN, t;
207: PetscBool set = PETSC_FALSE;
208: PetscLogStage ab_stage, abt_stage, atb_stage;
209: char stage_s[256];
211: PetscCall(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &C));
212: PetscCall(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &D));
213: PetscCall(MatSetRandom(C, NULL));
214: if (cuda) { /* convert to GPU if needed */
215: PetscCall(MatConvert(C, MATDENSECUDA, MAT_INPLACE_MATRIX, &C));
216: PetscCall(MatConvert(D, MATDENSECUDA, MAT_INPLACE_MATRIX, &D));
217: }
218: if (mkl) {
219: if (N[k] > 1) PetscCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_COLUMN_MAJOR, N[k], 1 + trial));
220: else PetscCallMKLSparse(mkl_sparse_set_mv_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, 1 + trial));
221: PetscCallMKLSparse(mkl_sparse_set_memory_hint, (spr, SPARSE_MEMORY_AGGRESSIVE));
222: PetscCallMKLSparse(mkl_sparse_optimize, (spr));
223: }
224: PetscCall(MatGetType(A, &Atype));
225: PetscCall(MatGetType(C, &Ctype));
226: PetscCall(MatGetSize(A, &AM, &AN));
227: PetscCall(MatGetSize(C, &CM, &CN));
229: if (!maij || N[k] > 1) {
230: PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "ab_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k]));
231: PetscCall(PetscLogStageRegister(stage_s, &ab_stage));
232: }
233: if (abt && N[k] > 1) {
234: PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "abt_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k]));
235: PetscCall(PetscLogStageRegister(stage_s, &abt_stage));
236: }
237: if (atb) {
238: PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "atb_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k]));
239: PetscCall(PetscLogStageRegister(stage_s, &atb_stage));
240: }
241: /* A*B */
242: if (N[k] > 1) {
243: if (!maij) {
244: PetscCall(MatProductCreateWithMat(A, C, NULL, D));
245: PetscCall(MatProductSetType(D, MATPRODUCT_AB));
246: PetscCall(MatProductSetFromOptions(D));
247: PetscCall(MatProductSymbolic(D));
248: }
250: if (!mkl) {
251: if (!maij) {
252: PetscCall(MatProductNumeric(D));
253: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_AB], Atype, AM, AN, Ctype, CM, CN));
254: PetscCall(PetscLogStagePush(ab_stage));
255: for (t = 0; t < trial; ++t) PetscCall(MatProductNumeric(D));
256: PetscCall(PetscLogStagePop());
257: } else {
258: Mat E, Ct, Dt;
259: Vec cC, cD;
260: const PetscScalar *c_ptr;
261: PetscScalar *d_ptr;
262: PetscCall(MatCreateMAIJ(A, N[k], &E));
263: PetscCall(MatDenseGetLocalMatrix(C, &Ct));
264: PetscCall(MatDenseGetLocalMatrix(D, &Dt));
265: PetscCall(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct));
266: PetscCall(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt));
267: PetscCall(MatDenseGetArrayRead(Ct, &c_ptr));
268: PetscCall(MatDenseGetArrayWrite(Dt, &d_ptr));
269: PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, c_ptr, &cC));
270: PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, d_ptr, &cD));
271: PetscCall(MatMult(E, cC, cD));
272: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MATMAIJ, AM, AN, VECMPI, AM * N[k], 1));
273: PetscCall(PetscLogStagePush(ab_stage));
274: for (t = 0; t < trial; ++t) PetscCall(MatMult(E, cC, cD));
275: PetscCall(PetscLogStagePop());
276: PetscCall(VecDestroy(&cD));
277: PetscCall(VecDestroy(&cC));
278: PetscCall(MatDestroy(&E));
279: PetscCall(MatDenseRestoreArrayWrite(Dt, &d_ptr));
280: PetscCall(MatDenseRestoreArrayRead(Ct, &c_ptr));
281: PetscCall(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct));
282: PetscCall(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt));
283: }
284: } else {
285: const PetscScalar *c_ptr;
286: PetscScalar *d_ptr;
288: PetscCall(MatDenseGetArrayRead(C, &c_ptr));
289: PetscCall(MatDenseGetArrayWrite(D, &d_ptr));
290: PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
291: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (COLUMN_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN));
292: PetscCall(PetscLogStagePush(ab_stage));
293: for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
294: PetscCall(PetscLogStagePop());
295: PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr));
296: PetscCall(MatDenseRestoreArrayRead(C, &c_ptr));
297: }
298: } else if (maij) {
299: PetscCall(MatDestroy(&C));
300: PetscCall(MatDestroy(&D));
301: continue;
302: } else if (!mkl) {
303: Vec cC, cD;
305: PetscCall(MatDenseGetColumnVecRead(C, 0, &cC));
306: PetscCall(MatDenseGetColumnVecWrite(D, 0, &cD));
307: PetscCall(MatMult(A, cC, cD));
308: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN));
309: PetscCall(PetscLogStagePush(ab_stage));
310: for (t = 0; t < trial; ++t) PetscCall(MatMult(A, cC, cD));
311: PetscCall(PetscLogStagePop());
312: PetscCall(MatDenseRestoreColumnVecRead(C, 0, &cC));
313: PetscCall(MatDenseRestoreColumnVecWrite(D, 0, &cD));
314: } else {
315: const PetscScalar *c_ptr;
316: PetscScalar *d_ptr;
318: PetscCall(MatDenseGetArrayRead(C, &c_ptr));
319: PetscCall(MatDenseGetArrayWrite(D, &d_ptr));
320: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mv: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN));
321: PetscCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr));
322: PetscCall(PetscLogStagePush(ab_stage));
323: for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr));
324: PetscCall(PetscLogStagePop());
325: PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr));
326: PetscCall(MatDenseRestoreArrayRead(C, &c_ptr));
327: }
329: if (check) {
330: PetscCall(MatMatMultEqual(A, C, D, 10, &flg));
331: if (!flg) {
332: MatType Dtype;
334: PetscCall(MatGetType(D, &Dtype));
335: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]));
336: }
337: }
339: /* MKL implementation seems buggy for ABt */
340: /* A*Bt */
341: if (!mkl && abt && N[k] > 1) {
342: PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, ""));
343: if (flg) {
344: PetscCall(MatTranspose(C, MAT_INPLACE_MATRIX, &C));
345: PetscCall(MatGetType(C, &Ctype));
346: if (!mkl) {
347: PetscCall(MatProductCreateWithMat(A, C, NULL, D));
348: PetscCall(MatProductSetType(D, MATPRODUCT_ABt));
349: PetscCall(MatProductSetFromOptions(D));
350: PetscCall(MatProductSymbolic(D));
351: PetscCall(MatProductNumeric(D));
352: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and Bt %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_ABt], Atype, AM, AN, Ctype, CM, CN));
353: PetscCall(PetscLogStagePush(abt_stage));
354: for (t = 0; t < trial; ++t) PetscCall(MatProductNumeric(D));
355: PetscCall(PetscLogStagePop());
356: } else {
357: const PetscScalar *c_ptr;
358: PetscScalar *d_ptr;
360: PetscCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_ROW_MAJOR, N[k], 1 + trial));
361: PetscCallMKLSparse(mkl_sparse_optimize, (spr));
362: PetscCall(MatDenseGetArrayRead(C, &c_ptr));
363: PetscCall(MatDenseGetArrayWrite(D, &d_ptr));
364: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (ROW_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN));
365: PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
366: PetscCall(PetscLogStagePush(ab_stage));
367: for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
368: PetscCall(PetscLogStagePop());
369: PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr));
370: PetscCall(MatDenseRestoreArrayRead(C, &c_ptr));
371: }
372: }
373: }
375: if (!mkl && abt && N[k] > 1 && flg && check) {
376: PetscCall(MatMatTransposeMultEqual(A, C, D, 10, &flg));
377: if (!flg) {
378: MatType Dtype;
379: PetscCall(MatGetType(D, &Dtype));
380: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]));
381: }
382: }
384: /* At*B */
385: if (!mkl && atb) {
386: PetscCall(MatIsSymmetricKnown(A, &set, &flg));
387: set = (PetscBool)(set && flg);
388: PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_FALSE));
389: if (N[k] > 1) {
390: PetscCall(MatProductCreateWithMat(A, C, NULL, D));
391: PetscCall(MatProductSetType(D, MATPRODUCT_AtB));
392: PetscCall(MatProductSetFromOptions(D));
393: PetscCall(MatProductSymbolic(D));
394: PetscCall(MatProductNumeric(D));
395: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with At %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_AtB], Atype, AM, AN, Ctype, CM, CN));
396: PetscCall(PetscLogStagePush(atb_stage));
397: for (t = 0; t < trial; ++t) PetscCall(MatProductNumeric(D));
398: PetscCall(PetscLogStagePop());
399: } else {
400: Vec cC, cD;
402: PetscCall(MatDenseGetColumnVecRead(C, 0, &cC));
403: PetscCall(MatDenseGetColumnVecWrite(D, 0, &cD));
404: PetscCall(MatMultTranspose(A, cC, cD));
405: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMultTranspose: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN));
406: PetscCall(PetscLogStagePush(atb_stage));
407: for (t = 0; t < trial; ++t) PetscCall(MatMultTranspose(A, cC, cD));
408: PetscCall(PetscLogStagePop());
409: PetscCall(MatDenseRestoreColumnVecRead(C, 0, &cC));
410: PetscCall(MatDenseRestoreColumnVecWrite(D, 0, &cD));
411: }
412: }
414: if (!mkl && atb && N[k] > 1 && check) {
415: PetscCall(MatTransposeMatMultEqual(A, C, D, 10, &flg));
416: if (!flg) {
417: MatType Dtype;
418: PetscCall(MatGetType(D, &Dtype));
419: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]));
420: }
421: }
422: if (set) PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
423: PetscCall(MatDestroy(&C));
424: PetscCall(MatDestroy(&D));
425: }
426: if (mkl) {
427: PetscCallMKLSparse(mkl_sparse_destroy, (spr));
428: PetscCall(PetscFree(ia_ptr));
429: PetscCall(PetscFree(ja_ptr));
430: }
431: if (cuda && i != ntype - 1) {
432: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "AIJCUSPARSE must be last, otherwise MatConvert() to another MatType is too slow\n"));
433: break;
434: }
435: }
436: if (E != A) PetscCall(MatDestroy(&E));
437: PetscCall(MatDestroy(&A));
438: }
439: for (m = 0; m < ntype; ++m) PetscCall(PetscFree(type[m]));
440: PetscCall(PetscFinalize());
441: return 0;
442: }
444: /*TEST
445: build:
446: requires: double !complex !defined(PETSC_USE_64BIT_INDICES)
448: testset:
449: nsize: 1
450: filter: sed "/Benchmarking/d"
451: args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3 -N 1,2,18 -check -ABt -convert_aij {{false true}shared output}
452: output_file: output/empty.out
453: test:
454: suffix: basic
455: args: -type aij,sbaij,baij
456: test:
457: suffix: maij
458: args: -type aij,maij
459: test:
460: suffix: cuda
461: requires: cuda
462: args: -type aij,aijcusparse
463: test:
464: suffix: mkl
465: requires: mkl_sparse_optimize
466: args: -type aij,aijmkl,baijmkl,sbaijmkl
468: test:
469: nsize: 1
470: filter: sed "/Benchmarking/d"
471: args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3,4,5,6 -N 1,8 -check -AtB -type baij
472: output_file: output/empty.out
474: TEST*/