Actual source code: bench_kspsolve.c

  1: /*
  2: Poisson in 3D. Modeled by the PDE:

  4:   - delta u(x,y,z) = f(x,y,z)

  6: With exact solution:

  8:    u(x,y,z) = 1.0

 10: Example usage:

 12:   Run on GPU (requires respective backends installed):
 13:     ./bench_kspsolve -mat_type aijcusparse
 14:     ./bench_kspsolve -mat_type aijhipsparse
 15:     ./bench_kspsolve -mat_type aijkokkos

 17:   Test only MatMult:
 18:     ./bench_kspsolve -matmult

 20:   Test MatMult over 1000 iterations:
 21:     ./bench_kspsolve -matmult -its 1000

 23:   Change size of problem (e.g., use a 128x128x128 grid):
 24:     ./bench_kspsolve -n 128
 25: */
 26: static char help[] = "Solves 3D Laplacian with 27-point finite difference stencil.\n";

 28: #include <petscksp.h>

 30: typedef struct {
 31:   PetscMPIInt rank, size;
 32:   PetscInt    n;        /* global size in each dimension */
 33:   PetscInt    dim;      /* global size */
 34:   PetscInt    nnz;      /* local nonzeros */
 35:   PetscBool   matmult;  /* Do MatMult() only */
 36:   PetscBool   debug;    /* Debug flag for PreallocateCOO() */
 37:   PetscBool   splitksp; /* Split KSPSolve and PCSetUp */
 38:   PetscInt    its;      /* No of matmult_iterations */
 39:   PetscInt    Istart, Iend;
 40: } AppCtx;

 42: static PetscErrorCode PreallocateCOO(Mat A, void *ctx)
 43: {
 44:   AppCtx  *user = (AppCtx *)ctx;
 45:   PetscInt n = user->n, n2 = n * n, n1 = n - 1;
 46:   PetscInt xstart, ystart, zstart, xend, yend, zend, nm2 = n - 2, idx;
 47:   PetscInt nnz[] = {8, 12, 18, 27}; /* nnz for corner, edge, face, and center. */

 49:   PetscFunctionBeginUser;
 50:   xstart = user->Istart % n;
 51:   ystart = (user->Istart / n) % n;
 52:   zstart = user->Istart / n2;
 53:   xend   = (user->Iend - 1) % n;
 54:   yend   = ((user->Iend - 1) / n) % n;
 55:   zend   = (user->Iend - 1) / n2;

 57:   /* bottom xy-plane */
 58:   user->nnz = 0;
 59:   idx       = !zstart ? 0 : 1;
 60:   if (zstart == zend && (!zstart || zstart == n1)) idx = 0;
 61:   if (zstart == zend && (zstart && zstart < n1)) idx = 1;
 62:   if (!xstart && !ystart) // bottom left
 63:     user->nnz += 4 * nnz[idx] + 4 * nm2 * nnz[idx + 1] + nm2 * nm2 * nnz[idx + 2];
 64:   else if (xstart && xstart < n1 && !ystart) // bottom
 65:     user->nnz += 3 * nnz[idx] + (3 * nm2 + n1 - xstart) * nnz[idx + 1] + nm2 * nm2 * nnz[idx + 2];
 66:   else if (xstart == n1 && !ystart) // bottom right
 67:     user->nnz += 3 * nnz[idx] + (3 * nm2) * nnz[idx + 1] + nm2 * nm2 * nnz[idx + 2];
 68:   else if (!xstart && ystart && ystart < n1) // left
 69:     user->nnz += 2 * nnz[idx] + (nm2 + 2 * (n1 - ystart)) * nnz[idx + 1] + nm2 * (n1 - ystart) * nnz[idx + 2];
 70:   else if (xstart && xstart < n1 && ystart && ystart < n1) // center
 71:     user->nnz += 2 * nnz[idx] + (nm2 + (n1 - ystart) + (nm2 - ystart)) * nnz[idx + 1] + (nm2 * (nm2 - ystart) + (n1 - xstart)) * nnz[idx + 2];
 72:   else if (xstart == n1 && ystart && ystart < n1) // right
 73:     user->nnz += 2 * nnz[idx] + (nm2 + n1 - ystart + nm2 - ystart) * nnz[idx + 1] + nm2 * (nm2 - ystart) * nnz[idx + 2];
 74:   else if (ystart == n1 && !xstart) // top left
 75:     user->nnz += 2 * nnz[idx] + nm2 * nnz[idx + 1];
 76:   else if (ystart == n1 && xstart && xstart < n1) // top
 77:     user->nnz += nnz[idx] + (n1 - xstart) * nnz[idx + 1];
 78:   else if (xstart == n1 && ystart == n1) // top right
 79:     user->nnz += nnz[idx];

 81:   /* top and middle plane the same */
 82:   if (zend == zstart) user->nnz = user->nnz - 4 * nnz[idx] - 4 * nm2 * nnz[idx + 1] - nm2 * nm2 * nnz[idx + 2];

 84:   /* middle xy-planes */
 85:   if (zend - zstart > 1) user->nnz = user->nnz + (zend - zstart - 1) * (4 * nnz[1] + 4 * nnz[2] * nm2 + nnz[3] * nm2 * nm2);

 87:   /* top xy-plane */
 88:   idx = zend == n1 ? 0 : 1;
 89:   if (zstart == zend && (!zend || zend == n1)) idx = 0;
 90:   if (zstart == zend && (zend && zend < n1)) idx = 1;
 91:   if (!xend && !yend) // bottom left
 92:     user->nnz += nnz[idx];
 93:   else if (xend && xend < n1 && !yend) // bottom
 94:     user->nnz += nnz[idx] + xend * nnz[idx + 1];
 95:   else if (xend == n1 && !yend) // bottom right
 96:     user->nnz += 2 * nnz[idx] + nm2 * nnz[idx + 1];
 97:   else if (!xend && yend && yend < n1) // left
 98:     user->nnz += 2 * nnz[idx] + (nm2 + yend + yend - 1) * nnz[idx + 1] + nm2 * (yend - 1) * nnz[idx + 2];
 99:   else if (xend && xend < n1 && yend && yend < n1) // center
100:     user->nnz += 2 * nnz[idx] + (nm2 + yend + yend - 1) * nnz[idx + 1] + (nm2 * (yend - 1) + xend) * nnz[idx + 2];
101:   else if (xend == n1 && yend && yend < n1) // right
102:     user->nnz += 2 * nnz[idx] + (nm2 + 2 * yend) * nnz[idx + 1] + (nm2 * yend) * nnz[idx + 2];
103:   else if (!xend && yend == n1) // top left
104:     user->nnz += 3 * nnz[idx] + (3 * nm2) * nnz[idx + 1] + (nm2 * nm2) * nnz[idx + 2];
105:   else if (xend && xend < n1 && yend == n1) // top
106:     user->nnz += 3 * nnz[idx] + (3 * nm2 + xend) * nnz[idx + 1] + (nm2 * nm2) * nnz[idx + 2];
107:   else if (xend == n1 && yend == n1) // top right
108:     user->nnz += 4 * nnz[idx] + (4 * nm2) * nnz[idx + 1] + (nm2 * nm2) * nnz[idx + 2];
109:   if (user->debug)
110:     PetscCall(PetscPrintf(PETSC_COMM_SELF, "rank %d: xyzstart = %" PetscInt_FMT ",%" PetscInt_FMT ",%" PetscInt_FMT ", xvzend = %" PetscInt_FMT ",%" PetscInt_FMT ",%" PetscInt_FMT ", nnz = %" PetscInt_FMT "\n", user->rank, xstart, ystart, zstart, xend, yend, zend,
111:                           user->nnz));
112:   PetscFunctionReturn(PETSC_SUCCESS);
113: }

115: PetscErrorCode FillCOO(Mat A, void *ctx)
116: {
117:   AppCtx      *user = (AppCtx *)ctx;
118:   PetscInt     Ii, x, y, z, n = user->n, n2 = n * n, n1 = n - 1;
119:   PetscInt     count = 0;
120:   PetscInt    *coo_i, *coo_j;
121:   PetscScalar *coo_v;
122:   PetscScalar  h     = 1.0 / (n - 1);
123:   PetscScalar  vcorn = -1.0 / 13 * h; //-1.0/12.0*h;
124:   PetscScalar  vedge = -3.0 / 26 * h; //-1.0/6.0*h;
125:   PetscScalar  vface = -3.0 / 13 * h; //-1.0e-9*h;
126:   PetscScalar  vcent = 44.0 / 13 * h; //8.0/3.0*h;

128:   PetscFunctionBeginUser;
129:   PetscCall(PetscCalloc3(user->nnz, &coo_i, user->nnz, &coo_j, user->nnz, &coo_v));
130:   /* Fill COO arrays */
131:   for (Ii = user->Istart; Ii < user->Iend; Ii++) {
132:     x = Ii % n;
133:     y = (Ii / n) % n;
134:     z = Ii / n2;
135:     if (x > 0 && y > 0 && z > 0) {
136:       coo_i[count] = Ii;
137:       coo_j[count] = Ii - 1 - n - n2;
138:       coo_v[count] = vcorn;
139:       count++;
140:     }
141:     if (x > 0 && y < n1 && z > 0) {
142:       coo_i[count] = Ii;
143:       coo_j[count] = Ii - 1 + n - n2;
144:       coo_v[count] = vcorn;
145:       count++;
146:     }
147:     if (x < n1 && y > 0 && z > 0) {
148:       coo_i[count] = Ii;
149:       coo_j[count] = Ii + 1 - n - n2;
150:       coo_v[count] = vcorn;
151:       count++;
152:     }
153:     if (x < n1 && y < n1 && z > 0) {
154:       coo_i[count] = Ii;
155:       coo_j[count] = Ii + 1 + n - n2;
156:       coo_v[count] = vcorn;
157:       count++;
158:     }
159:     if (x > 0 && y > 0 && z < n1) {
160:       coo_i[count] = Ii;
161:       coo_j[count] = Ii - 1 - n + n2;
162:       coo_v[count] = vcorn;
163:       count++;
164:     }
165:     if (x > 0 && y < n1 && z < n1) {
166:       coo_i[count] = Ii;
167:       coo_j[count] = Ii - 1 + n + n2;
168:       coo_v[count] = vcorn;
169:       count++;
170:     }
171:     if (x < n1 && y > 0 && z < n1) {
172:       coo_i[count] = Ii;
173:       coo_j[count] = Ii + 1 - n + n2;
174:       coo_v[count] = vcorn;
175:       count++;
176:     }
177:     if (x < n1 && y < n1 && z < n1) {
178:       coo_i[count] = Ii;
179:       coo_j[count] = Ii + 1 + n + n2;
180:       coo_v[count] = vcorn;
181:       count++;
182:     }
183:     /* 12 edges */
184:     if (x > 0 && y > 0) {
185:       coo_i[count] = Ii;
186:       coo_j[count] = Ii - 1 - n;
187:       coo_v[count] = vedge;
188:       count++;
189:     }
190:     if (x > 0 && y < n1) {
191:       coo_i[count] = Ii;
192:       coo_j[count] = Ii - 1 + n;
193:       coo_v[count] = vedge;
194:       count++;
195:     }
196:     if (x < n1 && y > 0) {
197:       coo_i[count] = Ii;
198:       coo_j[count] = Ii + 1 - n;
199:       coo_v[count] = vedge;
200:       count++;
201:     }
202:     if (x < n1 && y < n1) {
203:       coo_i[count] = Ii;
204:       coo_j[count] = Ii + 1 + n;
205:       coo_v[count] = vedge;
206:       count++;
207:     }
208:     if (x > 0 && z > 0) {
209:       coo_i[count] = Ii;
210:       coo_j[count] = Ii - 1 - n2;
211:       coo_v[count] = vedge;
212:       count++;
213:     }
214:     if (x > 0 && z < n1) {
215:       coo_i[count] = Ii;
216:       coo_j[count] = Ii - 1 + n2;
217:       coo_v[count] = vedge;
218:       count++;
219:     }
220:     if (x < n1 && z > 0) {
221:       coo_i[count] = Ii;
222:       coo_j[count] = Ii + 1 - n2;
223:       coo_v[count] = vedge;
224:       count++;
225:     }
226:     if (x < n1 && z < n1) {
227:       coo_i[count] = Ii;
228:       coo_j[count] = Ii + 1 + n2;
229:       coo_v[count] = vedge;
230:       count++;
231:     }
232:     if (y > 0 && z > 0) {
233:       coo_i[count] = Ii;
234:       coo_j[count] = Ii - n - n2;
235:       coo_v[count] = vedge;
236:       count++;
237:     }
238:     if (y > 0 && z < n1) {
239:       coo_i[count] = Ii;
240:       coo_j[count] = Ii - n + n2;
241:       coo_v[count] = vedge;
242:       count++;
243:     }
244:     if (y < n1 && z > 0) {
245:       coo_i[count] = Ii;
246:       coo_j[count] = Ii + n - n2;
247:       coo_v[count] = vedge;
248:       count++;
249:     }
250:     if (y < n1 && z < n1) {
251:       coo_i[count] = Ii;
252:       coo_j[count] = Ii + n + n2;
253:       coo_v[count] = vedge;
254:       count++;
255:     }
256:     /* 6 faces */
257:     if (x > 0) {
258:       coo_i[count] = Ii;
259:       coo_j[count] = Ii - 1;
260:       coo_v[count] = vface;
261:       count++;
262:     }
263:     if (x < n1) {
264:       coo_i[count] = Ii;
265:       coo_j[count] = Ii + 1;
266:       coo_v[count] = vface;
267:       count++;
268:     }
269:     if (y > 0) {
270:       coo_i[count] = Ii;
271:       coo_j[count] = Ii - n;
272:       coo_v[count] = vface;
273:       count++;
274:     }
275:     if (y < n1) {
276:       coo_i[count] = Ii;
277:       coo_j[count] = Ii + n;
278:       coo_v[count] = vface;
279:       count++;
280:     }
281:     if (z > 0) {
282:       coo_i[count] = Ii;
283:       coo_j[count] = Ii - n2;
284:       coo_v[count] = vface;
285:       count++;
286:     }
287:     if (z < n1) {
288:       coo_i[count] = Ii;
289:       coo_j[count] = Ii + n2;
290:       coo_v[count] = vface;
291:       count++;
292:     }
293:     /* Center */
294:     coo_i[count] = Ii;
295:     coo_j[count] = Ii;
296:     coo_v[count] = vcent;
297:     count++;
298:   }
299:   PetscCheck(count == user->nnz, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "Expected %" PetscInt_FMT " nonzeros but got %" PetscInt_FMT " nonzeros in COO format", user->nnz, count);
300:   PetscCall(MatSetPreallocationCOO(A, user->nnz, coo_i, coo_j));
301:   PetscCall(MatSetValuesCOO(A, coo_v, INSERT_VALUES));
302:   PetscCall(PetscFree3(coo_i, coo_j, coo_v));
303:   PetscFunctionReturn(PETSC_SUCCESS);
304: }

306: int main(int argc, char **argv)
307: {
308:   AppCtx                     user;                               /* Application context */
309:   Vec                        x, b, u;                            /* approx solution, RHS, and exact solution */
310:   Mat                        A;                                  /* linear system matrix */
311:   KSP                        ksp;                                /* linear solver context */
312:   PC                         pc;                                 /* Preconditioner */
313:   PetscReal                  norm;                               /* Error norm */
314:   PetscInt                   nlocal     = PETSC_DECIDE, ksp_its; /* Number of KSP iterations */
315:   PetscInt                   global_nnz = 0;                     /* Total number of nonzeros */
316:   PetscLogDouble             time_start, time_mid1 = 0.0, time_mid2 = 0.0, time_end, time_avg, floprate;
317:   PetscBool                  printTiming = PETSC_TRUE; /* If run in CI, do not print timing result */
318:   PETSC_UNUSED PetscLogStage stage;

320:   PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
321:   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &user.size));
322:   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &user.rank));

324:   user.n        = 100;         /* Default grid points per dimension */
325:   user.matmult  = PETSC_FALSE; /* Test MatMult only */
326:   user.its      = 100;         /* Default no. of iterations for MatMult test */
327:   user.debug    = PETSC_FALSE; /* Debug PreallocateCOO() */
328:   user.splitksp = PETSC_FALSE; /* Split KSPSolve and PCSetUp */
329:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &user.n, NULL));
330:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-its", &user.its, NULL));
331:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-matmult", &user.matmult, NULL));
332:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-debug", &user.debug, NULL));
333:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-print_timing", &printTiming, NULL));
334:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-split_ksp", &user.splitksp, NULL));

336:   user.dim   = user.n * user.n * user.n;
337:   global_nnz = 64 + 27 * (user.n - 2) * (user.n - 2) * (user.n - 2) + 108 * (user.n - 2) * (user.n - 2) + 144 * (user.n - 2);
338:   PetscCheck(user.n >= 2, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Requires at least 2 grid points (-n 2), you specified -n %" PetscInt_FMT, user.n);
339:   PetscCheck(user.dim >= user.size, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "MPI size (%d) exceeds the grid size %" PetscInt_FMT " (-n %" PetscInt_FMT ")", user.size, user.dim, user.n);
340:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "===========================================\n"));
341:   if (user.matmult) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Test: MatMult performance - Poisson\n"));
342:   else PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Test: KSP performance - Poisson\n"));
343:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\tInput matrix: 27-pt finite difference stencil\n"));
344:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\t-n %" PetscInt_FMT "\n", user.n));
345:   if (user.matmult) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\t-its %" PetscInt_FMT "\n", user.its));
346:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\tDoFs = %" PetscInt_FMT "\n", user.dim));
347:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\tNumber of nonzeros = %" PetscInt_FMT "\n", global_nnz));

349:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
350:   /*  Create the Vecs and Mat                                            */
351:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
352:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\nStep1  - creating Vecs and Mat...\n"));
353:   PetscCall(PetscLogStageRegister("Step1  - Vecs and Mat", &stage));
354:   PetscCall(PetscLogStagePush(stage));
355:   PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
356:   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, user.dim, user.dim));
357:   PetscCall(MatSetFromOptions(A));

359:   /* cannot use MatGetOwnershipRange() because the matrix has yet to be preallocated; that happens in MatSetPreallocateCOO() */
360:   PetscCall(PetscSplitOwnership(PetscObjectComm((PetscObject)A), &nlocal, &user.dim));
361:   PetscCallMPI(MPI_Scan(&nlocal, &user.Istart, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)A)));
362:   user.Istart -= nlocal;
363:   user.Iend = user.Istart + nlocal;

365:   PetscCall(PreallocateCOO(A, &user)); /* Determine local number of nonzeros */
366:   PetscCall(FillCOO(A, &user));        /* Fill COO Matrix */
367: #if !defined(PETSC_HAVE_HIP)           /* Due to errors in MatSolve_SeqAIJHIPSPARSE_ICC0() */
368:   PetscCall(MatSetOption(A, MAT_SPD, PETSC_TRUE));
369:   PetscCall(MatSetOption(A, MAT_SPD_ETERNAL, PETSC_TRUE));
370: #endif
371:   PetscCall(MatCreateVecs(A, &u, &b));
372:   if (!user.matmult) PetscCall(VecDuplicate(b, &x));
373:   PetscCall(VecSet(u, 1.0));   /* Exact solution */
374:   PetscCall(MatMult(A, u, b)); /* Compute RHS based on exact solution */
375:   PetscCall(PetscLogStagePop());

377:   if (user.matmult) {
378:     /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
379:     /*  MatMult                                                            */
380:     /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
381:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step2  - running MatMult() %" PetscInt_FMT " times...\n", user.its));
382:     PetscCall(PetscLogStageRegister("Step2  - MatMult", &stage));
383:     PetscCall(PetscLogStagePush(stage));
384:     PetscCall(PetscTime(&time_start));
385:     for (int i = 0; i < user.its; i++) PetscCall(MatMult(A, u, b));
386:     PetscCall(PetscTime(&time_end));
387:     PetscCall(PetscLogStagePop());
388:     /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
389:     /*  Calculate Performance metrics                                      */
390:     /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
391:     time_avg = (time_end - time_start) / ((PetscLogDouble)user.its);
392:     floprate = 2 * global_nnz / time_avg * 1e-9;
393:     if (printTiming) {
394:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n%-15s%-7.5f seconds\n", "Average time:", time_avg));
395:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%-15s%-9.3e Gflops/sec\n", "FOM:", floprate)); /* figure of merit */
396:     }
397:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "===========================================\n"));
398:   } else {
399:     if (!user.splitksp) {
400:       /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
401:       /*  Solve the linear system of equations                               */
402:       /*  Measure only time of PCSetUp() and KSPSolve()                      */
403:       /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
404:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step2  - running KSPSolve()...\n"));
405:       PetscCall(PetscLogStageRegister("Step2  - KSPSolve", &stage));
406:       PetscCall(PetscLogStagePush(stage));
407:       PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
408:       PetscCall(KSPSetOperators(ksp, A, A));
409:       PetscCall(KSPSetFromOptions(ksp));
410:       PetscCall(PetscTime(&time_start));
411:       PetscCall(KSPSolve(ksp, b, x));
412:       PetscCall(PetscTime(&time_end));
413:       PetscCall(PetscLogStagePop());
414:     } else {
415:       /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
416:       /*  Solve the linear system of equations                               */
417:       /*  Measure only time of PCSetUp() and KSPSolve()                      */
418:       /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
419:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step2a - running PCSetUp()...\n"));
420:       PetscCall(PetscLogStageRegister("Step2a - PCSetUp", &stage));
421:       PetscCall(PetscLogStagePush(stage));
422:       PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
423:       PetscCall(KSPSetOperators(ksp, A, A));
424:       PetscCall(KSPSetFromOptions(ksp));
425:       PetscCall(KSPGetPC(ksp, &pc));
426:       PetscCall(PetscTime(&time_start));
427:       PetscCall(PCSetUp(pc));
428:       PetscCall(PetscTime(&time_mid1));
429:       PetscCall(PetscLogStagePop());
430:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step2b - running KSPSolve()...\n"));
431:       PetscCall(PetscLogStageRegister("Step2b - KSPSolve", &stage));
432:       PetscCall(PetscLogStagePush(stage));
433:       PetscCall(PetscTime(&time_mid2));
434:       PetscCall(KSPSolve(ksp, b, x));
435:       PetscCall(PetscTime(&time_end));
436:       PetscCall(PetscLogStagePop());
437:     }

439:     /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
440:     /*  Calculate error norm                                               */
441:     /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
442:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step3  - calculating error norm...\n"));
443:     PetscCall(PetscLogStageRegister("Step3  - Error norm", &stage));
444:     PetscCall(PetscLogStagePush(stage));
445:     PetscCall(VecAXPY(x, -1.0, u));
446:     PetscCall(VecNorm(x, NORM_2, &norm));
447:     PetscCall(PetscLogStagePop());

449:     /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
450:     /*  Summary                                                            */
451:     /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
452:     PetscCall(KSPGetIterationNumber(ksp, &ksp_its));
453:     if (printTiming) {
454:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n%-15s%-1.3e\n", "Error norm:", (double)norm));
455:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%-15s%-3" PetscInt_FMT "\n", "KSP iters:", ksp_its));
456:       if (user.splitksp) {
457:         PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%-15s%-7.5f seconds\n", "PCSetUp:", time_mid1 - time_start));
458:         PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%-15s%-7.5f seconds\n", "KSPSolve:", time_end - time_mid2));
459:         PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%-15s%-7.5f seconds\n", "Total Solve:", time_end - time_start));
460:       } else {
461:         PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%-15s%-7.5f seconds\n", "KSPSolve:", time_end - time_start));
462:       }
463:       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%-15s%-1.3e DoFs/sec\n", "FOM:", user.dim / (time_end - time_start))); /* figure of merit */
464:     }
465:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "===========================================\n"));
466:   }
467:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
468:   /*  Free up memory                                                     */
469:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
470:   if (!user.matmult) {
471:     PetscCall(KSPDestroy(&ksp));
472:     PetscCall(VecDestroy(&x));
473:   }
474:   PetscCall(VecDestroy(&u));
475:   PetscCall(VecDestroy(&b));
476:   PetscCall(MatDestroy(&A));
477:   PetscCall(PetscFinalize());
478:   return 0;
479: }

481: /*TEST

483:   testset:
484:     args: -print_timing false -matmult -its 10 -n 8
485:     nsize: {{1 3}}
486:     output_file: output/bench_kspsolve_matmult.out

488:     test:
489:       suffix: matmult

491:     test:
492:       suffix: hip_matmult
493:       requires: hip
494:       args: -mat_type aijhipsparse

496:     test:
497:       suffix: cuda_matmult
498:       requires: cuda
499:       args: -mat_type aijcusparse

501:     test:
502:       suffix: kok_matmult
503:       requires: kokkos_kernels
504:       args: -mat_type aijkokkos

506:   testset:
507:     args: -print_timing false -its 10 -n 8
508:     nsize: {{3 5}}
509:     output_file: output/bench_kspsolve_ksp.out

511:     test:
512:       suffix: ksp

514:     test:
515:       suffix: nbr
516:       requires: defined(PETSC_HAVE_MPI_PERSISTENT_NEIGHBORHOOD_COLLECTIVES)
517:       args: -sf_type neighbor -sf_neighbor_persistent {{0 1}}

519:     test:
520:       suffix: hip_ksp
521:       requires: hip
522:       args: -mat_type aijhipsparse

524:     test:
525:       suffix: cuda_ksp
526:       requires: cuda
527:       args: -mat_type aijcusparse

529:     test:
530:       suffix: kok_ksp
531:       requires: kokkos_kernels
532:       args: -mat_type aijkokkos

534:     test:
535:       suffix: kok_nbr
536:       requires: kokkos_kernels defined(PETSC_HAVE_MPI_PERSISTENT_NEIGHBORHOOD_COLLECTIVES)
537:       args: -mat_type aijkokkos -sf_type neighbor -sf_neighbor_persistent {{0 1}}

539: TEST*/