Actual source code: ex237.c

  1: static char help[] = "Mini-app to benchmark matrix--matrix multiplication\n\n";

  3: /*
  4:   See the paper below for more information

  6:    "KSPHPDDM and PCHPDDM: Extending PETSc with Robust Overlapping Schwarz Preconditioners and Advanced Krylov Methods",
  7:    P. Jolivet, J. E. Roman, and S. Zampini (2020).
  8: */

 10: #include <petsc.h>

 12: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 13:   #include <mkl.h>
 14:   #define PetscCallMKLSparse(func, args) \
 15:     do { \
 16:       sparse_status_t __ierr; \
 17:       PetscStackPushExternal(#func); \
 18:       __ierr = func args; \
 19:       PetscStackPop; \
 20:       PetscCheck(__ierr == SPARSE_STATUS_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s(): error code %d", #func, (int)__ierr); \
 21:     } while (0)
 22: #else
 23:   #define PetscCallMKLSparse(func, args) \
 24:     do { \
 25:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No MKL support"); \
 26:     } while (0)
 27: #endif

 29: int main(int argc, char **argv)
 30: {
 31:   Mat         A, C, D, E;
 32:   PetscInt    nbs = 10, ntype = 10, nN = 8, m, M, trial = 5;
 33:   PetscViewer viewer;
 34:   PetscInt    bs[10], N[8];
 35:   char       *type[10];
 36:   PetscMPIInt size;
 37:   PetscBool   flg, cuda, maij = PETSC_FALSE, check = PETSC_FALSE, abt = PETSC_FALSE, atb = PETSC_FALSE, convert = PETSC_FALSE, mkl;
 38:   char        file[PETSC_MAX_PATH_LEN];

 40:   PetscFunctionBeginUser;
 41:   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
 42:   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
 43:   PetscCheck(size == 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only");
 44:   PetscCall(PetscOptionsGetString(NULL, NULL, "-f", file, PETSC_MAX_PATH_LEN, &flg));
 45:   PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate binary file with the -f option");
 46:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-trial", &trial, NULL));
 47:   PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-bs", bs, &nbs, &flg));
 48:   if (!flg) {
 49:     nbs   = 1;
 50:     bs[0] = 1;
 51:   }
 52:   PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-N", N, &nN, &flg));
 53:   if (!flg) {
 54:     nN   = 8;
 55:     N[0] = 1;
 56:     N[1] = 2;
 57:     N[2] = 4;
 58:     N[3] = 8;
 59:     N[4] = 16;
 60:     N[5] = 32;
 61:     N[6] = 64;
 62:     N[7] = 128;
 63:   }
 64:   PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-type", type, &ntype, &flg));
 65:   if (!flg) {
 66:     ntype = 1;
 67:     PetscCall(PetscStrallocpy(MATSEQAIJ, &type[0]));
 68:   }
 69:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-check", &check, NULL));
 70:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-ABt", &abt, NULL));
 71:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-AtB", &atb, NULL));
 72:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-convert_aij", &convert, NULL));
 73:   for (PetscInt j = 0; j < nbs; ++j) {
 74:     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer));
 75:     PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
 76:     PetscCall(MatSetFromOptions(A));
 77:     PetscCall(MatLoad(A, viewer));
 78:     PetscCall(PetscViewerDestroy(&viewer));
 79:     PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, ""));
 80:     PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate a MatAIJ input matrix");
 81:     PetscCall(MatGetSize(A, &m, &M));
 82:     if (m == M) {
 83:       Mat oA;
 84:       PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &oA));
 85:       PetscCall(MatAXPY(A, 1.0, oA, DIFFERENT_NONZERO_PATTERN));
 86:       PetscCall(MatDestroy(&oA));
 87:     }
 88:     PetscCall(MatGetLocalSize(A, &m, NULL));
 89:     PetscCall(MatGetSize(A, &M, NULL));
 90:     if (bs[j] > 1) {
 91:       Mat                T, Tt, B;
 92:       const PetscScalar *ptr;
 93:       PetscScalar       *val, *Aa;
 94:       const PetscInt    *Ai, *Aj;
 95:       PetscInt           An, i, k;
 96:       PetscBool          done;

 98:       PetscCall(MatCreateDense(PETSC_COMM_SELF, bs[j], bs[j], bs[j], bs[j], NULL, &T));
 99:       PetscCall(MatSetRandom(T, NULL));
100:       PetscCall(MatTranspose(T, MAT_INITIAL_MATRIX, &Tt));
101:       PetscCall(MatAXPY(T, 1.0, Tt, SAME_NONZERO_PATTERN));
102:       PetscCall(MatDestroy(&Tt));
103:       PetscCall(MatDenseGetArrayRead(T, &ptr));
104:       PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done));
105:       PetscCheck(done && An == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes");
106:       PetscCall(MatSeqAIJGetArray(A, &Aa));
107:       PetscCall(MatCreate(PETSC_COMM_WORLD, &B));
108:       PetscCall(MatSetType(B, MATSEQBAIJ));
109:       PetscCall(MatSetSizes(B, bs[j] * An, bs[j] * An, PETSC_DECIDE, PETSC_DECIDE));
110:       PetscCall(PetscMalloc1(Ai[An] * bs[j] * bs[j], &val));
111:       for (i = 0; i < Ai[An]; ++i)
112:         for (k = 0; k < bs[j] * bs[j]; ++k) val[i * bs[j] * bs[j] + k] = Aa[i] * ptr[k];
113:       PetscCall(MatSetOption(B, MAT_ROW_ORIENTED, PETSC_FALSE));
114:       PetscCall(MatSeqBAIJSetPreallocationCSR(B, bs[j], Ai, Aj, val));
115:       PetscCall(PetscFree(val));
116:       PetscCall(MatSeqAIJRestoreArray(A, &Aa));
117:       PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done));
118:       PetscCall(MatDenseRestoreArrayRead(T, &ptr));
119:       PetscCall(MatDestroy(&T));
120:       PetscCall(MatDestroy(&A));
121:       A = B;
122:     }
123:     /* reconvert back to SeqAIJ before converting to the desired type later */
124:     if (!convert) E = A;
125:     PetscCall(MatConvert(A, MATSEQAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &E));
126:     PetscCall(MatSetOption(E, MAT_SYMMETRIC, PETSC_TRUE));
127:     for (PetscInt i = 0; i < ntype; ++i) {
128:       char        *tmp = NULL;
129:       PetscInt    *ia_ptr, *ja_ptr, k;
130:       PetscScalar *a_ptr;
131: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
132:       struct matrix_descr descr;
133:       sparse_matrix_t     spr;
134:       descr.type = SPARSE_MATRIX_TYPE_GENERAL;
135:       descr.diag = SPARSE_DIAG_NON_UNIT;
136: #endif
137:       if (convert) PetscCall(MatDestroy(&A));
138:       PetscCall(PetscStrstr(type[i], "mkl", &tmp));
139:       if (tmp) {
140:         size_t mlen, tlen;
141:         char   base[256];

143:         mkl = PETSC_TRUE;
144:         PetscCall(PetscStrlen(tmp, &mlen));
145:         PetscCall(PetscStrlen(type[i], &tlen));
146:         PetscCall(PetscStrncpy(base, type[i], tlen - mlen + 1));
147:         PetscCall(MatConvert(E, base, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A));
148:       } else {
149:         mkl = PETSC_FALSE;
150:         PetscCall(PetscStrstr(type[i], "maij", &tmp));
151:         if (!tmp) {
152:           PetscCall(MatConvert(E, type[i], convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A));
153:         } else {
154:           PetscCall(MatConvert(E, MATAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A));
155:           maij = PETSC_TRUE;
156:         }
157:       }
158:       PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &cuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
159:       if (mkl) {
160:         const PetscInt *Ai, *Aj;
161:         PetscInt        An;
162:         PetscBool       done;

164:         PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATSEQBAIJ, MATSEQSBAIJ, ""));
165:         PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Not implemented");
166:         PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
167:         PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done));
168:         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes");
169:         PetscCall(PetscMalloc1(An + 1, &ia_ptr));
170:         PetscCall(PetscMalloc1(Ai[An], &ja_ptr));
171:         if (flg) { /* SeqAIJ */
172:           for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k];
173:           for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k];
174:           PetscCall(MatSeqAIJGetArray(A, &a_ptr));
175:           PetscCallMKLSparse(mkl_sparse_d_create_csr, (&spr, SPARSE_INDEX_BASE_ZERO, An, An, ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
176:         } else {
177:           PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &flg));
178:           if (flg) {
179:             for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
180:             for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
181:             PetscCall(MatSeqBAIJGetArray(A, &a_ptr));
182:             PetscCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
183:           } else {
184:             PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &flg));
185:             if (flg) {
186:               for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
187:               for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
188:               PetscCall(MatSeqSBAIJGetArray(A, &a_ptr));
189:               PetscCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
190: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
191:               descr.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
192:               descr.mode = SPARSE_FILL_MODE_UPPER;
193:               descr.diag = SPARSE_DIAG_NON_UNIT;
194: #endif
195:             }
196:           }
197:         }
198:         PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
199:         PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done));
200:       }

202:       PetscCall(MatViewFromOptions(A, NULL, "-A_view"));

204:       for (k = 0; k < nN; ++k) {
205:         MatType       Atype, Ctype;
206:         PetscInt      AM, AN, CM, CN, t;
207:         PetscBool     set = PETSC_FALSE;
208:         PetscLogStage ab_stage, abt_stage, atb_stage;
209:         char          stage_s[256];

211:         PetscCall(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &C));
212:         PetscCall(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &D));
213:         PetscCall(MatSetRandom(C, NULL));
214:         if (cuda) { /* convert to GPU if needed */
215:           PetscCall(MatConvert(C, MATDENSECUDA, MAT_INPLACE_MATRIX, &C));
216:           PetscCall(MatConvert(D, MATDENSECUDA, MAT_INPLACE_MATRIX, &D));
217:         }
218:         if (mkl) {
219:           if (N[k] > 1) PetscCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_COLUMN_MAJOR, N[k], 1 + trial));
220:           else PetscCallMKLSparse(mkl_sparse_set_mv_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, 1 + trial));
221:           PetscCallMKLSparse(mkl_sparse_set_memory_hint, (spr, SPARSE_MEMORY_AGGRESSIVE));
222:           PetscCallMKLSparse(mkl_sparse_optimize, (spr));
223:         }
224:         PetscCall(MatGetType(A, &Atype));
225:         PetscCall(MatGetType(C, &Ctype));
226:         PetscCall(MatGetSize(A, &AM, &AN));
227:         PetscCall(MatGetSize(C, &CM, &CN));

229:         if (!maij || N[k] > 1) {
230:           PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "ab_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k]));
231:           PetscCall(PetscLogStageRegister(stage_s, &ab_stage));
232:         }
233:         if (abt && N[k] > 1) {
234:           PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "abt_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k]));
235:           PetscCall(PetscLogStageRegister(stage_s, &abt_stage));
236:         }
237:         if (atb) {
238:           PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "atb_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k]));
239:           PetscCall(PetscLogStageRegister(stage_s, &atb_stage));
240:         }
241:         /* A*B */
242:         if (N[k] > 1) {
243:           if (!maij) {
244:             PetscCall(MatProductCreateWithMat(A, C, NULL, D));
245:             PetscCall(MatProductSetType(D, MATPRODUCT_AB));
246:             PetscCall(MatProductSetFromOptions(D));
247:             PetscCall(MatProductSymbolic(D));
248:           }

250:           if (!mkl) {
251:             if (!maij) {
252:               PetscCall(MatProductNumeric(D));
253:               PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_AB], Atype, AM, AN, Ctype, CM, CN));
254:               PetscCall(PetscLogStagePush(ab_stage));
255:               for (t = 0; t < trial; ++t) PetscCall(MatProductNumeric(D));
256:               PetscCall(PetscLogStagePop());
257:             } else {
258:               Mat                E, Ct, Dt;
259:               Vec                cC, cD;
260:               const PetscScalar *c_ptr;
261:               PetscScalar       *d_ptr;
262:               PetscCall(MatCreateMAIJ(A, N[k], &E));
263:               PetscCall(MatDenseGetLocalMatrix(C, &Ct));
264:               PetscCall(MatDenseGetLocalMatrix(D, &Dt));
265:               PetscCall(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct));
266:               PetscCall(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt));
267:               PetscCall(MatDenseGetArrayRead(Ct, &c_ptr));
268:               PetscCall(MatDenseGetArrayWrite(Dt, &d_ptr));
269:               PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, c_ptr, &cC));
270:               PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, d_ptr, &cD));
271:               PetscCall(MatMult(E, cC, cD));
272:               PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MATMAIJ, AM, AN, VECMPI, AM * N[k], 1));
273:               PetscCall(PetscLogStagePush(ab_stage));
274:               for (t = 0; t < trial; ++t) PetscCall(MatMult(E, cC, cD));
275:               PetscCall(PetscLogStagePop());
276:               PetscCall(VecDestroy(&cD));
277:               PetscCall(VecDestroy(&cC));
278:               PetscCall(MatDestroy(&E));
279:               PetscCall(MatDenseRestoreArrayWrite(Dt, &d_ptr));
280:               PetscCall(MatDenseRestoreArrayRead(Ct, &c_ptr));
281:               PetscCall(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct));
282:               PetscCall(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt));
283:             }
284:           } else {
285:             const PetscScalar *c_ptr;
286:             PetscScalar       *d_ptr;

288:             PetscCall(MatDenseGetArrayRead(C, &c_ptr));
289:             PetscCall(MatDenseGetArrayWrite(D, &d_ptr));
290:             PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
291:             PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (COLUMN_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN));
292:             PetscCall(PetscLogStagePush(ab_stage));
293:             for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
294:             PetscCall(PetscLogStagePop());
295:             PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr));
296:             PetscCall(MatDenseRestoreArrayRead(C, &c_ptr));
297:           }
298:         } else if (maij) {
299:           PetscCall(MatDestroy(&C));
300:           PetscCall(MatDestroy(&D));
301:           continue;
302:         } else if (!mkl) {
303:           Vec cC, cD;

305:           PetscCall(MatDenseGetColumnVecRead(C, 0, &cC));
306:           PetscCall(MatDenseGetColumnVecWrite(D, 0, &cD));
307:           PetscCall(MatMult(A, cC, cD));
308:           PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN));
309:           PetscCall(PetscLogStagePush(ab_stage));
310:           for (t = 0; t < trial; ++t) PetscCall(MatMult(A, cC, cD));
311:           PetscCall(PetscLogStagePop());
312:           PetscCall(MatDenseRestoreColumnVecRead(C, 0, &cC));
313:           PetscCall(MatDenseRestoreColumnVecWrite(D, 0, &cD));
314:         } else {
315:           const PetscScalar *c_ptr;
316:           PetscScalar       *d_ptr;

318:           PetscCall(MatDenseGetArrayRead(C, &c_ptr));
319:           PetscCall(MatDenseGetArrayWrite(D, &d_ptr));
320:           PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mv: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN));
321:           PetscCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr));
322:           PetscCall(PetscLogStagePush(ab_stage));
323:           for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr));
324:           PetscCall(PetscLogStagePop());
325:           PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr));
326:           PetscCall(MatDenseRestoreArrayRead(C, &c_ptr));
327:         }

329:         if (check) {
330:           PetscCall(MatMatMultEqual(A, C, D, 10, &flg));
331:           if (!flg) {
332:             MatType Dtype;

334:             PetscCall(MatGetType(D, &Dtype));
335:             PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]));
336:           }
337:         }

339:         /* MKL implementation seems buggy for ABt */
340:         /* A*Bt */
341:         if (!mkl && abt && N[k] > 1) {
342:           PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, ""));
343:           if (flg) {
344:             PetscCall(MatTranspose(C, MAT_INPLACE_MATRIX, &C));
345:             PetscCall(MatGetType(C, &Ctype));
346:             if (!mkl) {
347:               PetscCall(MatProductCreateWithMat(A, C, NULL, D));
348:               PetscCall(MatProductSetType(D, MATPRODUCT_ABt));
349:               PetscCall(MatProductSetFromOptions(D));
350:               PetscCall(MatProductSymbolic(D));
351:               PetscCall(MatProductNumeric(D));
352:               PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and Bt %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_ABt], Atype, AM, AN, Ctype, CM, CN));
353:               PetscCall(PetscLogStagePush(abt_stage));
354:               for (t = 0; t < trial; ++t) PetscCall(MatProductNumeric(D));
355:               PetscCall(PetscLogStagePop());
356:             } else {
357:               const PetscScalar *c_ptr;
358:               PetscScalar       *d_ptr;

360:               PetscCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_ROW_MAJOR, N[k], 1 + trial));
361:               PetscCallMKLSparse(mkl_sparse_optimize, (spr));
362:               PetscCall(MatDenseGetArrayRead(C, &c_ptr));
363:               PetscCall(MatDenseGetArrayWrite(D, &d_ptr));
364:               PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (ROW_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN));
365:               PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
366:               PetscCall(PetscLogStagePush(ab_stage));
367:               for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
368:               PetscCall(PetscLogStagePop());
369:               PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr));
370:               PetscCall(MatDenseRestoreArrayRead(C, &c_ptr));
371:             }
372:           }
373:         }

375:         if (!mkl && abt && N[k] > 1 && flg && check) {
376:           PetscCall(MatMatTransposeMultEqual(A, C, D, 10, &flg));
377:           if (!flg) {
378:             MatType Dtype;
379:             PetscCall(MatGetType(D, &Dtype));
380:             PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]));
381:           }
382:         }

384:         /* At*B */
385:         if (!mkl && atb) {
386:           PetscCall(MatIsSymmetricKnown(A, &set, &flg));
387:           set = (PetscBool)(set && flg);
388:           PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_FALSE));
389:           if (N[k] > 1) {
390:             PetscCall(MatProductCreateWithMat(A, C, NULL, D));
391:             PetscCall(MatProductSetType(D, MATPRODUCT_AtB));
392:             PetscCall(MatProductSetFromOptions(D));
393:             PetscCall(MatProductSymbolic(D));
394:             PetscCall(MatProductNumeric(D));
395:             PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with At %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_AtB], Atype, AM, AN, Ctype, CM, CN));
396:             PetscCall(PetscLogStagePush(atb_stage));
397:             for (t = 0; t < trial; ++t) PetscCall(MatProductNumeric(D));
398:             PetscCall(PetscLogStagePop());
399:           } else {
400:             Vec cC, cD;

402:             PetscCall(MatDenseGetColumnVecRead(C, 0, &cC));
403:             PetscCall(MatDenseGetColumnVecWrite(D, 0, &cD));
404:             PetscCall(MatMultTranspose(A, cC, cD));
405:             PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMultTranspose: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN));
406:             PetscCall(PetscLogStagePush(atb_stage));
407:             for (t = 0; t < trial; ++t) PetscCall(MatMultTranspose(A, cC, cD));
408:             PetscCall(PetscLogStagePop());
409:             PetscCall(MatDenseRestoreColumnVecRead(C, 0, &cC));
410:             PetscCall(MatDenseRestoreColumnVecWrite(D, 0, &cD));
411:           }
412:         }

414:         if (!mkl && atb && N[k] > 1 && check) {
415:           PetscCall(MatTransposeMatMultEqual(A, C, D, 10, &flg));
416:           if (!flg) {
417:             MatType Dtype;
418:             PetscCall(MatGetType(D, &Dtype));
419:             PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]));
420:           }
421:         }
422:         if (set) PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
423:         PetscCall(MatDestroy(&C));
424:         PetscCall(MatDestroy(&D));
425:       }
426:       if (mkl) {
427:         PetscCallMKLSparse(mkl_sparse_destroy, (spr));
428:         PetscCall(PetscFree(ia_ptr));
429:         PetscCall(PetscFree(ja_ptr));
430:       }
431:       if (cuda && i != ntype - 1) {
432:         PetscCall(PetscPrintf(PETSC_COMM_WORLD, "AIJCUSPARSE must be last, otherwise MatConvert() to another MatType is too slow\n"));
433:         break;
434:       }
435:     }
436:     if (E != A) PetscCall(MatDestroy(&E));
437:     PetscCall(MatDestroy(&A));
438:   }
439:   for (m = 0; m < ntype; ++m) PetscCall(PetscFree(type[m]));
440:   PetscCall(PetscFinalize());
441:   return 0;
442: }

444: /*TEST
445:    build:
446:      requires: double !complex !defined(PETSC_USE_64BIT_INDICES)

448:    testset:
449:      nsize: 1
450:      filter: sed "/Benchmarking/d"
451:      args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3 -N 1,2,18 -check -ABt -convert_aij {{false true}shared output}
452:      output_file: output/empty.out
453:      test:
454:        suffix: basic
455:        args: -type aij,sbaij,baij
456:      test:
457:        suffix: maij
458:        args: -type aij,maij
459:      test:
460:        suffix: cuda
461:        requires: cuda
462:        args: -type aij,aijcusparse
463:      test:
464:        suffix: mkl
465:        requires: mkl_sparse_optimize
466:        args: -type aij,aijmkl,baijmkl,sbaijmkl

468:    test:
469:      nsize: 1
470:      filter: sed "/Benchmarking/d"
471:      args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3,4,5,6 -N 1,8 -check -AtB -type baij
472:      output_file: output/empty.out

474: TEST*/