Actual source code: performance.c

  1: static char help[] = "Time vector operations on GPU\n";
  2: /* This program produces the results for Argonne Technical Report ANL-19/41.
  3:    The technical report and resources for generating data can be found in the
  4:    repository:  https://gitlab.com/hannah_mairs/summit-performance */

  6: #include <petscvec.h>

  8: int main(int argc, char **argv)
  9: {
 10:   Vec           v, w, x;
 11:   PetscInt      n = 15;
 12:   PetscScalar   val;
 13:   PetscReal     norm1, norm2;
 14:   PetscRandom   rctx;
 15:   PetscLogStage stage;

 17:   PetscFunctionBeginUser;
 18:   PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
 19:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
 20:   PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx));
 21:   PetscCall(PetscRandomSetFromOptions(rctx));
 22:   PetscCall(VecCreate(PETSC_COMM_WORLD, &v));
 23:   PetscCall(VecSetSizes(v, PETSC_DECIDE, n));
 24:   PetscCall(VecSetFromOptions(v));
 25:   PetscCall(VecDuplicate(v, &w));
 26:   PetscCall(VecSetRandom(v, rctx));
 27:   PetscCall(VecSetRandom(w, rctx));

 29:   /* create dummy vector to clear cache */
 30:   PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
 31:   PetscCall(VecSetSizes(x, PETSC_DECIDE, 10000000));
 32:   PetscCall(VecSetFromOptions(x));
 33:   PetscCall(VecSetRandom(x, rctx));

 35:   /* send v to GPU */
 36:   PetscCall(PetscBarrier(NULL));
 37:   PetscCall(VecNorm(v, NORM_1, &norm1));

 39:   /* register a stage work on GPU */
 40:   PetscCall(PetscLogStageRegister("Work on GPU", &stage));
 41:   PetscCall(PetscLogStagePush(stage));
 42:   PetscCall(VecNorm(w, NORM_1, &norm1)); /* send w to GPU */
 43:   PetscCall(VecNorm(x, NORM_1, &norm1)); /* clear cache */
 44:   PetscCall(PetscBarrier(NULL));
 45:   PetscCall(VecAXPY(w, 1.0, v));
 46:   PetscCall(VecNorm(x, NORM_INFINITY, &norm1));
 47:   PetscCall(PetscBarrier(NULL));
 48:   PetscCall(VecDot(w, v, &val));
 49:   PetscCall(VecNorm(x, NORM_1, &norm1));
 50:   PetscCall(PetscBarrier(NULL));
 51:   PetscCall(VecSet(v, 0.0));
 52:   PetscCall(VecNorm(x, NORM_2, &norm2));
 53:   PetscCall(PetscBarrier(NULL));
 54:   PetscCall(VecCopy(v, w));
 55:   PetscCall(PetscLogStagePop());

 57:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Test completed successfully!\n"));
 58:   PetscCall(VecDestroy(&v));
 59:   PetscCall(VecDestroy(&w));
 60:   PetscCall(VecDestroy(&x));
 61:   PetscCall(PetscRandomDestroy(&rctx));
 62:   PetscCall(PetscFinalize());
 63:   return 0;
 64: }

 66: /*TEST

 68:    testset:
 69:       nsize: 2
 70:       output_file: output/performance_cuda.out

 72:       test:
 73:         suffix: cuda
 74:         args: -vec_type mpicuda
 75:         requires: cuda

 77:       test:
 78:         suffix: hip
 79:         args: -vec_type mpihip
 80:         requires: hip

 82: TEST*/