Actual source code: bench_kspsolve.c
1: /*
2: Poisson in 3D. Modeled by the PDE:
4: - delta u(x,y,z) = f(x,y,z)
6: With exact solution:
8: u(x,y,z) = 1.0
10: Example usage:
12: Run on GPU (requires respective backends installed):
13: ./bench_kspsolve -mat_type aijcusparse
14: ./bench_kspsolve -mat_type aijhipsparse
15: ./bench_kspsolve -mat_type aijkokkos
17: Test only MatMult:
18: ./bench_kspsolve -matmult
20: Test MatMult over 1000 iterations:
21: ./bench_kspsolve -matmult -its 1000
23: Change size of problem (e.g., use a 128x128x128 grid):
24: ./bench_kspsolve -n 128
25: */
26: static char help[] = "Solves 3D Laplacian with 27-point finite difference stencil.\n";
28: #include <petscksp.h>
30: typedef struct {
31: PetscMPIInt rank, size;
32: PetscInt n; /* global size in each dimension */
33: PetscInt dim; /* global size */
34: PetscInt nnz; /* local nonzeros */
35: PetscBool matmult; /* Do MatMult() only */
36: PetscBool debug; /* Debug flag for PreallocateCOO() */
37: PetscBool splitksp; /* Split KSPSolve and PCSetUp */
38: PetscInt its; /* No of matmult_iterations */
39: PetscInt Istart, Iend;
40: } AppCtx;
42: static PetscErrorCode PreallocateCOO(Mat A, void *ctx)
43: {
44: AppCtx *user = (AppCtx *)ctx;
45: PetscInt n = user->n, n2 = n * n, n1 = n - 1;
46: PetscInt xstart, ystart, zstart, xend, yend, zend, nm2 = n - 2, idx;
47: PetscInt nnz[] = {8, 12, 18, 27}; /* nnz for corner, edge, face, and center. */
49: PetscFunctionBeginUser;
50: xstart = user->Istart % n;
51: ystart = (user->Istart / n) % n;
52: zstart = user->Istart / n2;
53: xend = (user->Iend - 1) % n;
54: yend = ((user->Iend - 1) / n) % n;
55: zend = (user->Iend - 1) / n2;
57: /* bottom xy-plane */
58: user->nnz = 0;
59: idx = !zstart ? 0 : 1;
60: if (zstart == zend && (!zstart || zstart == n1)) idx = 0;
61: if (zstart == zend && (zstart && zstart < n1)) idx = 1;
62: if (!xstart && !ystart) // bottom left
63: user->nnz += 4 * nnz[idx] + 4 * nm2 * nnz[idx + 1] + nm2 * nm2 * nnz[idx + 2];
64: else if (xstart && xstart < n1 && !ystart) // bottom
65: user->nnz += 3 * nnz[idx] + (3 * nm2 + n1 - xstart) * nnz[idx + 1] + nm2 * nm2 * nnz[idx + 2];
66: else if (xstart == n1 && !ystart) // bottom right
67: user->nnz += 3 * nnz[idx] + (3 * nm2) * nnz[idx + 1] + nm2 * nm2 * nnz[idx + 2];
68: else if (!xstart && ystart && ystart < n1) // left
69: user->nnz += 2 * nnz[idx] + (nm2 + 2 * (n1 - ystart)) * nnz[idx + 1] + nm2 * (n1 - ystart) * nnz[idx + 2];
70: else if (xstart && xstart < n1 && ystart && ystart < n1) // center
71: user->nnz += 2 * nnz[idx] + (nm2 + (n1 - ystart) + (nm2 - ystart)) * nnz[idx + 1] + (nm2 * (nm2 - ystart) + (n1 - xstart)) * nnz[idx + 2];
72: else if (xstart == n1 && ystart && ystart < n1) // right
73: user->nnz += 2 * nnz[idx] + (nm2 + n1 - ystart + nm2 - ystart) * nnz[idx + 1] + nm2 * (nm2 - ystart) * nnz[idx + 2];
74: else if (ystart == n1 && !xstart) // top left
75: user->nnz += 2 * nnz[idx] + nm2 * nnz[idx + 1];
76: else if (ystart == n1 && xstart && xstart < n1) // top
77: user->nnz += nnz[idx] + (n1 - xstart) * nnz[idx + 1];
78: else if (xstart == n1 && ystart == n1) // top right
79: user->nnz += nnz[idx];
81: /* top and middle plane the same */
82: if (zend == zstart) user->nnz = user->nnz - 4 * nnz[idx] - 4 * nm2 * nnz[idx + 1] - nm2 * nm2 * nnz[idx + 2];
84: /* middle xy-planes */
85: if (zend - zstart > 1) user->nnz = user->nnz + (zend - zstart - 1) * (4 * nnz[1] + 4 * nnz[2] * nm2 + nnz[3] * nm2 * nm2);
87: /* top xy-plane */
88: idx = zend == n1 ? 0 : 1;
89: if (zstart == zend && (!zend || zend == n1)) idx = 0;
90: if (zstart == zend && (zend && zend < n1)) idx = 1;
91: if (!xend && !yend) // bottom left
92: user->nnz += nnz[idx];
93: else if (xend && xend < n1 && !yend) // bottom
94: user->nnz += nnz[idx] + xend * nnz[idx + 1];
95: else if (xend == n1 && !yend) // bottom right
96: user->nnz += 2 * nnz[idx] + nm2 * nnz[idx + 1];
97: else if (!xend && yend && yend < n1) // left
98: user->nnz += 2 * nnz[idx] + (nm2 + yend + yend - 1) * nnz[idx + 1] + nm2 * (yend - 1) * nnz[idx + 2];
99: else if (xend && xend < n1 && yend && yend < n1) // center
100: user->nnz += 2 * nnz[idx] + (nm2 + yend + yend - 1) * nnz[idx + 1] + (nm2 * (yend - 1) + xend) * nnz[idx + 2];
101: else if (xend == n1 && yend && yend < n1) // right
102: user->nnz += 2 * nnz[idx] + (nm2 + 2 * yend) * nnz[idx + 1] + (nm2 * yend) * nnz[idx + 2];
103: else if (!xend && yend == n1) // top left
104: user->nnz += 3 * nnz[idx] + (3 * nm2) * nnz[idx + 1] + (nm2 * nm2) * nnz[idx + 2];
105: else if (xend && xend < n1 && yend == n1) // top
106: user->nnz += 3 * nnz[idx] + (3 * nm2 + xend) * nnz[idx + 1] + (nm2 * nm2) * nnz[idx + 2];
107: else if (xend == n1 && yend == n1) // top right
108: user->nnz += 4 * nnz[idx] + (4 * nm2) * nnz[idx + 1] + (nm2 * nm2) * nnz[idx + 2];
109: if (user->debug)
110: PetscCall(PetscPrintf(PETSC_COMM_SELF, "rank %d: xyzstart = %" PetscInt_FMT ",%" PetscInt_FMT ",%" PetscInt_FMT ", xvzend = %" PetscInt_FMT ",%" PetscInt_FMT ",%" PetscInt_FMT ", nnz = %" PetscInt_FMT "\n", user->rank, xstart, ystart, zstart, xend, yend, zend,
111: user->nnz));
112: PetscFunctionReturn(PETSC_SUCCESS);
113: }
115: PetscErrorCode FillCOO(Mat A, void *ctx)
116: {
117: AppCtx *user = (AppCtx *)ctx;
118: PetscInt Ii, x, y, z, n = user->n, n2 = n * n, n1 = n - 1;
119: PetscInt count = 0;
120: PetscInt *coo_i, *coo_j;
121: PetscScalar *coo_v;
122: PetscScalar h = 1.0 / (n - 1);
123: PetscScalar vcorn = -1.0 / 13 * h; //-1.0/12.0*h;
124: PetscScalar vedge = -3.0 / 26 * h; //-1.0/6.0*h;
125: PetscScalar vface = -3.0 / 13 * h; //-1.0e-9*h;
126: PetscScalar vcent = 44.0 / 13 * h; //8.0/3.0*h;
128: PetscFunctionBeginUser;
129: PetscCall(PetscCalloc3(user->nnz, &coo_i, user->nnz, &coo_j, user->nnz, &coo_v));
130: /* Fill COO arrays */
131: for (Ii = user->Istart; Ii < user->Iend; Ii++) {
132: x = Ii % n;
133: y = (Ii / n) % n;
134: z = Ii / n2;
135: if (x > 0 && y > 0 && z > 0) {
136: coo_i[count] = Ii;
137: coo_j[count] = Ii - 1 - n - n2;
138: coo_v[count] = vcorn;
139: count++;
140: }
141: if (x > 0 && y < n1 && z > 0) {
142: coo_i[count] = Ii;
143: coo_j[count] = Ii - 1 + n - n2;
144: coo_v[count] = vcorn;
145: count++;
146: }
147: if (x < n1 && y > 0 && z > 0) {
148: coo_i[count] = Ii;
149: coo_j[count] = Ii + 1 - n - n2;
150: coo_v[count] = vcorn;
151: count++;
152: }
153: if (x < n1 && y < n1 && z > 0) {
154: coo_i[count] = Ii;
155: coo_j[count] = Ii + 1 + n - n2;
156: coo_v[count] = vcorn;
157: count++;
158: }
159: if (x > 0 && y > 0 && z < n1) {
160: coo_i[count] = Ii;
161: coo_j[count] = Ii - 1 - n + n2;
162: coo_v[count] = vcorn;
163: count++;
164: }
165: if (x > 0 && y < n1 && z < n1) {
166: coo_i[count] = Ii;
167: coo_j[count] = Ii - 1 + n + n2;
168: coo_v[count] = vcorn;
169: count++;
170: }
171: if (x < n1 && y > 0 && z < n1) {
172: coo_i[count] = Ii;
173: coo_j[count] = Ii + 1 - n + n2;
174: coo_v[count] = vcorn;
175: count++;
176: }
177: if (x < n1 && y < n1 && z < n1) {
178: coo_i[count] = Ii;
179: coo_j[count] = Ii + 1 + n + n2;
180: coo_v[count] = vcorn;
181: count++;
182: }
183: /* 12 edges */
184: if (x > 0 && y > 0) {
185: coo_i[count] = Ii;
186: coo_j[count] = Ii - 1 - n;
187: coo_v[count] = vedge;
188: count++;
189: }
190: if (x > 0 && y < n1) {
191: coo_i[count] = Ii;
192: coo_j[count] = Ii - 1 + n;
193: coo_v[count] = vedge;
194: count++;
195: }
196: if (x < n1 && y > 0) {
197: coo_i[count] = Ii;
198: coo_j[count] = Ii + 1 - n;
199: coo_v[count] = vedge;
200: count++;
201: }
202: if (x < n1 && y < n1) {
203: coo_i[count] = Ii;
204: coo_j[count] = Ii + 1 + n;
205: coo_v[count] = vedge;
206: count++;
207: }
208: if (x > 0 && z > 0) {
209: coo_i[count] = Ii;
210: coo_j[count] = Ii - 1 - n2;
211: coo_v[count] = vedge;
212: count++;
213: }
214: if (x > 0 && z < n1) {
215: coo_i[count] = Ii;
216: coo_j[count] = Ii - 1 + n2;
217: coo_v[count] = vedge;
218: count++;
219: }
220: if (x < n1 && z > 0) {
221: coo_i[count] = Ii;
222: coo_j[count] = Ii + 1 - n2;
223: coo_v[count] = vedge;
224: count++;
225: }
226: if (x < n1 && z < n1) {
227: coo_i[count] = Ii;
228: coo_j[count] = Ii + 1 + n2;
229: coo_v[count] = vedge;
230: count++;
231: }
232: if (y > 0 && z > 0) {
233: coo_i[count] = Ii;
234: coo_j[count] = Ii - n - n2;
235: coo_v[count] = vedge;
236: count++;
237: }
238: if (y > 0 && z < n1) {
239: coo_i[count] = Ii;
240: coo_j[count] = Ii - n + n2;
241: coo_v[count] = vedge;
242: count++;
243: }
244: if (y < n1 && z > 0) {
245: coo_i[count] = Ii;
246: coo_j[count] = Ii + n - n2;
247: coo_v[count] = vedge;
248: count++;
249: }
250: if (y < n1 && z < n1) {
251: coo_i[count] = Ii;
252: coo_j[count] = Ii + n + n2;
253: coo_v[count] = vedge;
254: count++;
255: }
256: /* 6 faces */
257: if (x > 0) {
258: coo_i[count] = Ii;
259: coo_j[count] = Ii - 1;
260: coo_v[count] = vface;
261: count++;
262: }
263: if (x < n1) {
264: coo_i[count] = Ii;
265: coo_j[count] = Ii + 1;
266: coo_v[count] = vface;
267: count++;
268: }
269: if (y > 0) {
270: coo_i[count] = Ii;
271: coo_j[count] = Ii - n;
272: coo_v[count] = vface;
273: count++;
274: }
275: if (y < n1) {
276: coo_i[count] = Ii;
277: coo_j[count] = Ii + n;
278: coo_v[count] = vface;
279: count++;
280: }
281: if (z > 0) {
282: coo_i[count] = Ii;
283: coo_j[count] = Ii - n2;
284: coo_v[count] = vface;
285: count++;
286: }
287: if (z < n1) {
288: coo_i[count] = Ii;
289: coo_j[count] = Ii + n2;
290: coo_v[count] = vface;
291: count++;
292: }
293: /* Center */
294: coo_i[count] = Ii;
295: coo_j[count] = Ii;
296: coo_v[count] = vcent;
297: count++;
298: }
299: PetscCheck(count == user->nnz, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "Expected %" PetscInt_FMT " nonzeros but got %" PetscInt_FMT " nonzeros in COO format", user->nnz, count);
300: PetscCall(MatSetPreallocationCOO(A, user->nnz, coo_i, coo_j));
301: PetscCall(MatSetValuesCOO(A, coo_v, INSERT_VALUES));
302: PetscCall(PetscFree3(coo_i, coo_j, coo_v));
303: PetscFunctionReturn(PETSC_SUCCESS);
304: }
306: int main(int argc, char **argv)
307: {
308: AppCtx user; /* Application context */
309: Vec x, b, u; /* approx solution, RHS, and exact solution */
310: Mat A; /* linear system matrix */
311: KSP ksp; /* linear solver context */
312: PC pc; /* Preconditioner */
313: PetscReal norm; /* Error norm */
314: PetscInt nlocal = PETSC_DECIDE, ksp_its; /* Number of KSP iterations */
315: PetscInt global_nnz = 0; /* Total number of nonzeros */
316: PetscLogDouble time_start, time_mid1 = 0.0, time_mid2 = 0.0, time_end, time_avg, floprate;
317: PetscBool printTiming = PETSC_TRUE; /* If run in CI, do not print timing result */
318: PETSC_UNUSED PetscLogStage stage;
320: PetscCall(PetscInitialize(&argc, &argv, NULL, help));
321: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &user.size));
322: PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &user.rank));
324: user.n = 100; /* Default grid points per dimension */
325: user.matmult = PETSC_FALSE; /* Test MatMult only */
326: user.its = 100; /* Default no. of iterations for MatMult test */
327: user.debug = PETSC_FALSE; /* Debug PreallocateCOO() */
328: user.splitksp = PETSC_FALSE; /* Split KSPSolve and PCSetUp */
329: PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &user.n, NULL));
330: PetscCall(PetscOptionsGetInt(NULL, NULL, "-its", &user.its, NULL));
331: PetscCall(PetscOptionsGetBool(NULL, NULL, "-matmult", &user.matmult, NULL));
332: PetscCall(PetscOptionsGetBool(NULL, NULL, "-debug", &user.debug, NULL));
333: PetscCall(PetscOptionsGetBool(NULL, NULL, "-print_timing", &printTiming, NULL));
334: PetscCall(PetscOptionsGetBool(NULL, NULL, "-split_ksp", &user.splitksp, NULL));
336: user.dim = user.n * user.n * user.n;
337: global_nnz = 64 + 27 * (user.n - 2) * (user.n - 2) * (user.n - 2) + 108 * (user.n - 2) * (user.n - 2) + 144 * (user.n - 2);
338: PetscCheck(user.n >= 2, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Requires at least 2 grid points (-n 2), you specified -n %" PetscInt_FMT, user.n);
339: PetscCheck(user.dim >= user.size, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "MPI size (%d) exceeds the grid size %" PetscInt_FMT " (-n %" PetscInt_FMT ")", user.size, user.dim, user.n);
340: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "===========================================\n"));
341: if (user.matmult) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Test: MatMult performance - Poisson\n"));
342: else PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Test: KSP performance - Poisson\n"));
343: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\tInput matrix: 27-pt finite difference stencil\n"));
344: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\t-n %" PetscInt_FMT "\n", user.n));
345: if (user.matmult) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\t-its %" PetscInt_FMT "\n", user.its));
346: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\tDoFs = %" PetscInt_FMT "\n", user.dim));
347: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\tNumber of nonzeros = %" PetscInt_FMT "\n", global_nnz));
349: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
350: /* Create the Vecs and Mat */
351: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
352: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\nStep1 - creating Vecs and Mat...\n"));
353: PetscCall(PetscLogStageRegister("Step1 - Vecs and Mat", &stage));
354: PetscCall(PetscLogStagePush(stage));
355: PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
356: PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, user.dim, user.dim));
357: PetscCall(MatSetFromOptions(A));
359: /* cannot use MatGetOwnershipRange() because the matrix has yet to be preallocated; that happens in MatSetPreallocateCOO() */
360: PetscCall(PetscSplitOwnership(PetscObjectComm((PetscObject)A), &nlocal, &user.dim));
361: PetscCallMPI(MPI_Scan(&nlocal, &user.Istart, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)A)));
362: user.Istart -= nlocal;
363: user.Iend = user.Istart + nlocal;
365: PetscCall(PreallocateCOO(A, &user)); /* Determine local number of nonzeros */
366: PetscCall(FillCOO(A, &user)); /* Fill COO Matrix */
367: #if !defined(PETSC_HAVE_HIP) /* Due to errors in MatSolve_SeqAIJHIPSPARSE_ICC0() */
368: PetscCall(MatSetOption(A, MAT_SPD, PETSC_TRUE));
369: PetscCall(MatSetOption(A, MAT_SPD_ETERNAL, PETSC_TRUE));
370: #endif
371: PetscCall(MatCreateVecs(A, &u, &b));
372: if (!user.matmult) PetscCall(VecDuplicate(b, &x));
373: PetscCall(VecSet(u, 1.0)); /* Exact solution */
374: PetscCall(MatMult(A, u, b)); /* Compute RHS based on exact solution */
375: PetscCall(PetscLogStagePop());
377: if (user.matmult) {
378: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
379: /* MatMult */
380: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
381: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step2 - running MatMult() %" PetscInt_FMT " times...\n", user.its));
382: PetscCall(PetscLogStageRegister("Step2 - MatMult", &stage));
383: PetscCall(PetscLogStagePush(stage));
384: PetscCall(PetscTime(&time_start));
385: for (int i = 0; i < user.its; i++) PetscCall(MatMult(A, u, b));
386: PetscCall(PetscTime(&time_end));
387: PetscCall(PetscLogStagePop());
388: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
389: /* Calculate Performance metrics */
390: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
391: time_avg = (time_end - time_start) / ((PetscLogDouble)user.its);
392: floprate = 2 * global_nnz / time_avg * 1e-9;
393: if (printTiming) {
394: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n%-15s%-7.5f seconds\n", "Average time:", time_avg));
395: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%-15s%-9.3e Gflops/sec\n", "FOM:", floprate)); /* figure of merit */
396: }
397: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "===========================================\n"));
398: } else {
399: if (!user.splitksp) {
400: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
401: /* Solve the linear system of equations */
402: /* Measure only time of PCSetUp() and KSPSolve() */
403: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
404: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step2 - running KSPSolve()...\n"));
405: PetscCall(PetscLogStageRegister("Step2 - KSPSolve", &stage));
406: PetscCall(PetscLogStagePush(stage));
407: PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
408: PetscCall(KSPSetOperators(ksp, A, A));
409: PetscCall(KSPSetFromOptions(ksp));
410: PetscCall(PetscTime(&time_start));
411: PetscCall(KSPSolve(ksp, b, x));
412: PetscCall(PetscTime(&time_end));
413: PetscCall(PetscLogStagePop());
414: } else {
415: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
416: /* Solve the linear system of equations */
417: /* Measure only time of PCSetUp() and KSPSolve() */
418: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
419: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step2a - running PCSetUp()...\n"));
420: PetscCall(PetscLogStageRegister("Step2a - PCSetUp", &stage));
421: PetscCall(PetscLogStagePush(stage));
422: PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
423: PetscCall(KSPSetOperators(ksp, A, A));
424: PetscCall(KSPSetFromOptions(ksp));
425: PetscCall(KSPGetPC(ksp, &pc));
426: PetscCall(PetscTime(&time_start));
427: PetscCall(PCSetUp(pc));
428: PetscCall(PetscTime(&time_mid1));
429: PetscCall(PetscLogStagePop());
430: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step2b - running KSPSolve()...\n"));
431: PetscCall(PetscLogStageRegister("Step2b - KSPSolve", &stage));
432: PetscCall(PetscLogStagePush(stage));
433: PetscCall(PetscTime(&time_mid2));
434: PetscCall(KSPSolve(ksp, b, x));
435: PetscCall(PetscTime(&time_end));
436: PetscCall(PetscLogStagePop());
437: }
439: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
440: /* Calculate error norm */
441: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
442: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step3 - calculating error norm...\n"));
443: PetscCall(PetscLogStageRegister("Step3 - Error norm", &stage));
444: PetscCall(PetscLogStagePush(stage));
445: PetscCall(VecAXPY(x, -1.0, u));
446: PetscCall(VecNorm(x, NORM_2, &norm));
447: PetscCall(PetscLogStagePop());
449: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
450: /* Summary */
451: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
452: PetscCall(KSPGetIterationNumber(ksp, &ksp_its));
453: if (printTiming) {
454: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n%-15s%-1.3e\n", "Error norm:", (double)norm));
455: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%-15s%-3" PetscInt_FMT "\n", "KSP iters:", ksp_its));
456: if (user.splitksp) {
457: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%-15s%-7.5f seconds\n", "PCSetUp:", time_mid1 - time_start));
458: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%-15s%-7.5f seconds\n", "KSPSolve:", time_end - time_mid2));
459: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%-15s%-7.5f seconds\n", "Total Solve:", time_end - time_start));
460: } else {
461: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%-15s%-7.5f seconds\n", "KSPSolve:", time_end - time_start));
462: }
463: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%-15s%-1.3e DoFs/sec\n", "FOM:", user.dim / (time_end - time_start))); /* figure of merit */
464: }
465: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "===========================================\n"));
466: }
467: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
468: /* Free up memory */
469: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
470: if (!user.matmult) {
471: PetscCall(KSPDestroy(&ksp));
472: PetscCall(VecDestroy(&x));
473: }
474: PetscCall(VecDestroy(&u));
475: PetscCall(VecDestroy(&b));
476: PetscCall(MatDestroy(&A));
477: PetscCall(PetscFinalize());
478: return 0;
479: }
481: /*TEST
483: testset:
484: args: -print_timing false -matmult -its 10 -n 8
485: nsize: {{1 3}}
486: output_file: output/bench_kspsolve_matmult.out
488: test:
489: suffix: matmult
491: test:
492: suffix: hip_matmult
493: requires: hip
494: args: -mat_type aijhipsparse
496: test:
497: suffix: cuda_matmult
498: requires: cuda
499: args: -mat_type aijcusparse
501: test:
502: suffix: kok_matmult
503: requires: kokkos_kernels
504: args: -mat_type aijkokkos
506: testset:
507: args: -print_timing false -its 10 -n 8
508: nsize: {{3 5}}
509: output_file: output/bench_kspsolve_ksp.out
511: test:
512: suffix: ksp
514: test:
515: suffix: nbr
516: requires: defined(PETSC_HAVE_MPI_PERSISTENT_NEIGHBORHOOD_COLLECTIVES)
517: args: -sf_type neighbor -sf_neighbor_persistent {{0 1}}
519: test:
520: suffix: hip_ksp
521: requires: hip
522: args: -mat_type aijhipsparse
524: test:
525: suffix: cuda_ksp
526: requires: cuda
527: args: -mat_type aijcusparse
529: test:
530: suffix: kok_ksp
531: requires: kokkos_kernels
532: args: -mat_type aijkokkos
534: test:
535: suffix: kok_nbr
536: requires: kokkos_kernels defined(PETSC_HAVE_MPI_PERSISTENT_NEIGHBORHOOD_COLLECTIVES)
537: args: -mat_type aijkokkos -sf_type neighbor -sf_neighbor_persistent {{0 1}}
539: TEST*/