Actual source code: lognvtx.c
1: #include <petsc/private/logimpl.h>
2: #include <petsc/private/loghandlerimpl.h>
3: #include <petscdevice.h>
4: #include <nvToolsExt.h>
6: static PetscErrorCode PetscLogHandlerEventBegin_NVTX(PetscLogHandler handler, PetscLogEvent event, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
7: {
8: PetscLogState state;
9: PetscLogEventInfo info;
11: PetscFunctionBegin;
12: if (PetscDeviceInitialized(PETSC_DEVICE_CUDA)) {
13: PetscCall(PetscLogHandlerGetState(handler, &state));
14: PetscCall(PetscLogStateEventGetInfo(state, event, &info));
15: (void)nvtxRangePushA(info.name);
16: }
17: PetscFunctionReturn(PETSC_SUCCESS);
18: }
20: static PetscErrorCode PetscLogHandlerEventEnd_NVTX(PetscLogHandler handler, PetscLogEvent event, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
21: {
22: PetscFunctionBegin;
23: if (PetscDeviceInitialized(PETSC_DEVICE_CUDA)) (void)nvtxRangePop();
24: PetscFunctionReturn(PETSC_SUCCESS);
25: }
27: /*MC
28: PETSCLOGHANDLERNVTX - PETSCLOGHANDLERNVTX = "nvtx" - A
29: `PetscLogHandler` that creates an NVTX range (which appears in Nvidia Nsight
30: profiling) for each PETSc event.
32: Options Database Keys:
33: + -log_nvtx - start an nvtx log handler manually
34: - -log_nvtx 0 - stop the nvtx log handler from starting automatically in `PetscInitialize()` in a program run within an nsys profiling session (see Note)
36: Level: developer
38: Note:
39: If `PetscInitialize()` detects the environment variable `NSYS_PROFILING_SESSION_ID` (which is defined by `nsys
40: profile`) or `NVPROF_ID` (which is defined by `nvprof`) an instance of this log handler will automatically be
41: started.
43: .seealso: [](ch_profiling), `PetscLogHandler`
44: M*/
46: PETSC_INTERN PetscErrorCode PetscLogHandlerCreate_NVTX(PetscLogHandler handler)
47: {
48: PetscFunctionBegin;
49: handler->ops->eventbegin = PetscLogHandlerEventBegin_NVTX;
50: handler->ops->eventend = PetscLogHandlerEventEnd_NVTX;
51: PetscCall(PetscInfo(handler, "nvtx log handler created\n"));
52: PetscFunctionReturn(PETSC_SUCCESS);
53: }