Actual source code: performance.c
1: static char help[] = "Time vector operations on GPU\n";
2: /* This program produces the results for Argonne Technical Report ANL-19/41.
3: The technical report and resources for generating data can be found in the
4: repository: */
6: #include <petscvec.h>
8: int main(int argc, char **argv)
9: {
10: Vec v, w, x;
11: PetscInt n = 15;
12: PetscScalar val;
13: PetscReal norm1, norm2;
14: PetscRandom rctx;
15: PetscLogStage stage;
17: PetscFunctionBeginUser;
18: PetscCall(PetscInitialize(&argc, &argv, NULL, help));
19: PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
20: PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx));
21: PetscCall(PetscRandomSetFromOptions(rctx));
22: PetscCall(VecCreate(PETSC_COMM_WORLD, &v));
23: PetscCall(VecSetSizes(v, PETSC_DECIDE, n));
24: PetscCall(VecSetFromOptions(v));
25: PetscCall(VecDuplicate(v, &w));
26: PetscCall(VecSetRandom(v, rctx));
27: PetscCall(VecSetRandom(w, rctx));
29: /* create dummy vector to clear cache */
30: PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
31: PetscCall(VecSetSizes(x, PETSC_DECIDE, 10000000));
32: PetscCall(VecSetFromOptions(x));
33: PetscCall(VecSetRandom(x, rctx));
35: /* send v to GPU */
36: PetscCall(PetscBarrier(NULL));
37: PetscCall(VecNorm(v, NORM_1, &norm1));
39: /* register a stage work on GPU */
40: PetscCall(PetscLogStageRegister("Work on GPU", &stage));
41: PetscCall(PetscLogStagePush(stage));
42: PetscCall(VecNorm(w, NORM_1, &norm1)); /* send w to GPU */
43: PetscCall(VecNorm(x, NORM_1, &norm1)); /* clear cache */
44: PetscCall(PetscBarrier(NULL));
45: PetscCall(VecAXPY(w, 1.0, v));
46: PetscCall(VecNorm(x, NORM_INFINITY, &norm1));
47: PetscCall(PetscBarrier(NULL));
48: PetscCall(VecDot(w, v, &val));
49: PetscCall(VecNorm(x, NORM_1, &norm1));
50: PetscCall(PetscBarrier(NULL));
51: PetscCall(VecSet(v, 0.0));
52: PetscCall(VecNorm(x, NORM_2, &norm2));
53: PetscCall(PetscBarrier(NULL));
54: PetscCall(VecCopy(v, w));
55: PetscCall(PetscLogStagePop());
57: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Test completed successfully!\n"));
58: PetscCall(VecDestroy(&v));
59: PetscCall(VecDestroy(&w));
60: PetscCall(VecDestroy(&x));
61: PetscCall(PetscRandomDestroy(&rctx));
62: PetscCall(PetscFinalize());
63: return 0;
64: }
66: /*TEST
68: testset:
69: nsize: 2
70: output_file: output/performance_cuda.out
72: test:
73: suffix: cuda
74: args: -vec_type mpicuda
75: requires: cuda
77: test:
78: suffix: hip
79: args: -vec_type mpihip
80: requires: hip
82: TEST*/