Actual source code: plog.c


  2: /*
  3:       PETSc code to log object creation and destruction and PETSc events.

  5:       This provides the public API used by the rest of PETSc and by users.

  7:       These routines use a private API that is not used elsewhere in PETSc and is not
  8:       accessible to users. The private API is defined in logimpl.h and the utils directory.

 10: */
 11: #include <petsc/private/logimpl.h>
 12: #include <petsctime.h>
 13: #include <petscviewer.h>
 14: #include <petscdevice.h>
 15: #include <petsc/private/deviceimpl.h>

 17: PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT;

 19: #if defined(PETSC_USE_LOG)
 20:   #include <petscmachineinfo.h>
 21:   #include <petscconfiginfo.h>

 23: /* used in the MPI_XXX() count macros in petsclog.h */

 25: /* Action and object logging variables */
 26: Action   *petsc_actions    = NULL;
 27: Object   *petsc_objects    = NULL;
 28: PetscBool petsc_logActions = PETSC_FALSE;
 29: PetscBool petsc_logObjects = PETSC_FALSE;
 30: int       petsc_numActions = 0, petsc_maxActions = 100;
 31: int       petsc_numObjects = 0, petsc_maxObjects = 100;
 32: int       petsc_numObjectsDestroyed = 0;

 34: /* Global counters */
 35: PetscLogDouble petsc_BaseTime        = 0.0;
 36: PetscLogDouble petsc_TotalFlops      = 0.0; /* The number of flops */
 37: PetscLogDouble petsc_tmp_flops       = 0.0; /* The incremental number of flops */
 38: PetscLogDouble petsc_send_ct         = 0.0; /* The number of sends */
 39: PetscLogDouble petsc_recv_ct         = 0.0; /* The number of receives */
 40: PetscLogDouble petsc_send_len        = 0.0; /* The total length of all sent messages */
 41: PetscLogDouble petsc_recv_len        = 0.0; /* The total length of all received messages */
 42: PetscLogDouble petsc_isend_ct        = 0.0; /* The number of immediate sends */
 43: PetscLogDouble petsc_irecv_ct        = 0.0; /* The number of immediate receives */
 44: PetscLogDouble petsc_isend_len       = 0.0; /* The total length of all immediate send messages */
 45: PetscLogDouble petsc_irecv_len       = 0.0; /* The total length of all immediate receive messages */
 46: PetscLogDouble petsc_wait_ct         = 0.0; /* The number of waits */
 47: PetscLogDouble petsc_wait_any_ct     = 0.0; /* The number of anywaits */
 48: PetscLogDouble petsc_wait_all_ct     = 0.0; /* The number of waitalls */
 49: PetscLogDouble petsc_sum_of_waits_ct = 0.0; /* The total number of waits */
 50: PetscLogDouble petsc_allreduce_ct    = 0.0; /* The number of reductions */
 51: PetscLogDouble petsc_gather_ct       = 0.0; /* The number of gathers and gathervs */
 52: PetscLogDouble petsc_scatter_ct      = 0.0; /* The number of scatters and scattervs */
 53:   #if defined(PETSC_HAVE_DEVICE)
 54: PetscLogDouble petsc_ctog_ct        = 0.0; /* The total number of CPU to GPU copies */
 55: PetscLogDouble petsc_gtoc_ct        = 0.0; /* The total number of GPU to CPU copies */
 56: PetscLogDouble petsc_ctog_sz        = 0.0; /* The total size of CPU to GPU copies */
 57: PetscLogDouble petsc_gtoc_sz        = 0.0; /* The total size of GPU to CPU copies */
 58: PetscLogDouble petsc_ctog_ct_scalar = 0.0; /* The total number of CPU to GPU copies */
 59: PetscLogDouble petsc_gtoc_ct_scalar = 0.0; /* The total number of GPU to CPU copies */
 60: PetscLogDouble petsc_ctog_sz_scalar = 0.0; /* The total size of CPU to GPU copies */
 61: PetscLogDouble petsc_gtoc_sz_scalar = 0.0; /* The total size of GPU to CPU copies */
 62: PetscLogDouble petsc_gflops         = 0.0; /* The flops done on a GPU */
 63: PetscLogDouble petsc_gtime          = 0.0; /* The time spent on a GPU */
 64:   #endif

 66: /* Logging functions */
 67: PetscErrorCode (*PetscLogPHC)(PetscObject)                                                            = NULL;
 68: PetscErrorCode (*PetscLogPHD)(PetscObject)                                                            = NULL;
 69: PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL;
 70: PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL;

 72: /* Tracing event logging variables */
 73: FILE            *petsc_tracefile          = NULL;
 74: int              petsc_tracelevel         = 0;
 75: const char      *petsc_traceblanks        = "                                                                                                    ";
 76: char             petsc_tracespace[128]    = " ";
 77: PetscLogDouble   petsc_tracetime          = 0.0;
 78: static PetscBool PetscLogInitializeCalled = PETSC_FALSE;

 80: static PetscIntStack current_log_event_stack = NULL;

 82: PETSC_INTERN PetscErrorCode PetscLogInitialize(void)
 83: {
 84:   int       stage;
 85:   PetscBool opt;

 87:   if (PetscLogInitializeCalled) return 0;
 88:   PetscLogInitializeCalled = PETSC_TRUE;

 90:   PetscIntStackCreate(&current_log_event_stack);
 91:   PetscOptionsHasName(NULL, NULL, "-log_exclude_actions", &opt);
 92:   if (opt) petsc_logActions = PETSC_FALSE;
 93:   PetscOptionsHasName(NULL, NULL, "-log_exclude_objects", &opt);
 94:   if (opt) petsc_logObjects = PETSC_FALSE;
 95:   if (petsc_logActions) PetscMalloc1(petsc_maxActions, &petsc_actions);
 96:   if (petsc_logObjects) PetscMalloc1(petsc_maxObjects, &petsc_objects);
 97:   PetscLogPHC = PetscLogObjCreateDefault;
 98:   PetscLogPHD = PetscLogObjDestroyDefault;
 99:   /* Setup default logging structures */
100:   PetscStageLogCreate(&petsc_stageLog);
101:   PetscStageLogRegister(petsc_stageLog, "Main Stage", &stage);

103:   /* All processors sync here for more consistent logging */
104:   MPI_Barrier(PETSC_COMM_WORLD);
105:   PetscTime(&petsc_BaseTime);
106:   PetscLogStagePush(stage);
107:   return 0;
108: }

110: PETSC_INTERN PetscErrorCode PetscLogFinalize(void)
111: {
112:   PetscStageLog stageLog;

114:   PetscFree(petsc_actions);
115:   PetscFree(petsc_objects);
116:   PetscLogNestedEnd();
117:   PetscLogSet(NULL, NULL);

119:   /* Resetting phase */
120:   PetscLogGetStageLog(&stageLog);
121:   PetscStageLogDestroy(stageLog);
122:   PetscIntStackDestroy(current_log_event_stack);
123:   current_log_event_stack = NULL;

125:   petsc_TotalFlops          = 0.0;
126:   petsc_numActions          = 0;
127:   petsc_numObjects          = 0;
128:   petsc_numObjectsDestroyed = 0;
129:   petsc_maxActions          = 100;
130:   petsc_maxObjects          = 100;
131:   petsc_actions             = NULL;
132:   petsc_objects             = NULL;
133:   petsc_logActions          = PETSC_FALSE;
134:   petsc_logObjects          = PETSC_FALSE;
135:   petsc_BaseTime            = 0.0;
136:   petsc_TotalFlops          = 0.0;
137:   petsc_tmp_flops           = 0.0;
138:   petsc_send_ct             = 0.0;
139:   petsc_recv_ct             = 0.0;
140:   petsc_send_len            = 0.0;
141:   petsc_recv_len            = 0.0;
142:   petsc_isend_ct            = 0.0;
143:   petsc_irecv_ct            = 0.0;
144:   petsc_isend_len           = 0.0;
145:   petsc_irecv_len           = 0.0;
146:   petsc_wait_ct             = 0.0;
147:   petsc_wait_any_ct         = 0.0;
148:   petsc_wait_all_ct         = 0.0;
149:   petsc_sum_of_waits_ct     = 0.0;
150:   petsc_allreduce_ct        = 0.0;
151:   petsc_gather_ct           = 0.0;
152:   petsc_scatter_ct          = 0.0;
153:   #if defined(PETSC_HAVE_DEVICE)
154:   petsc_ctog_ct = 0.0;
155:   petsc_gtoc_ct = 0.0;
156:   petsc_ctog_sz = 0.0;
157:   petsc_gtoc_sz = 0.0;
158:   petsc_gflops  = 0.0;
159:   petsc_gtime   = 0.0;
160:   #endif
161:   PETSC_LARGEST_EVENT      = PETSC_EVENT;
162:   PetscLogPHC              = NULL;
163:   PetscLogPHD              = NULL;
164:   petsc_tracefile          = NULL;
165:   petsc_tracelevel         = 0;
166:   petsc_traceblanks        = "                                                                                                    ";
167:   petsc_tracespace[0]      = ' ';
168:   petsc_tracespace[1]      = 0;
169:   petsc_tracetime          = 0.0;
170:   PETSC_LARGEST_CLASSID    = PETSC_SMALLEST_CLASSID;
171:   PETSC_OBJECT_CLASSID     = 0;
172:   petsc_stageLog           = NULL;
173:   PetscLogInitializeCalled = PETSC_FALSE;
174:   return 0;
175: }

177: /*@C
178:   PetscLogSet - Sets the logging functions called at the beginning and ending of every event.

180:   Not Collective

182:   Input Parameters:
183: + b - The function called at beginning of event
184: - e - The function called at end of event

186:   Level: developer

188:   Developer Note:
189:   The default loggers are `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`.

191: .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogTraceBegin()`, `PetscLogEventBeginDefault()`, `PetscLogEventEndDefault()`
192: @*/
193: PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject))
194: {
195:   PetscLogPLB = b;
196:   PetscLogPLE = e;
197:   return 0;
198: }

200: /*@C
201:   PetscLogIsActive - Check if logging is currently in progress.

203:   Not Collective

205:   Output Parameter:
206: . isActive - `PETSC_TRUE` if logging is in progress, `PETSC_FALSE` otherwise

208:   Level: beginner

210: .seealso: `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogSet()`
211: @*/
212: PetscErrorCode PetscLogIsActive(PetscBool *isActive)
213: {
214:   *isActive = (PetscLogPLB && PetscLogPLE) ? PETSC_TRUE : PETSC_FALSE;
215:   return 0;
216: }

218: /*@C
219:   PetscLogDefaultBegin - Turns on logging of objects and events using the default logging functions `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. This logs flop
220:   rates and object creation and should not slow programs down too much.
221:   This routine may be called more than once.

223:   Logically Collective over `PETSC_COMM_WORLD`

225:   Options Database Key:
226: . -log_view [viewertype:filename:viewerformat] - Prints summary of flop and timing information to the
227:                   screen (for code configured with --with-log=1 (which is the default))

229:   Usage:
230: .vb
231:       PetscInitialize(...);
232:       PetscLogDefaultBegin();
233:        ... code ...
234:       PetscLogView(viewer); or PetscLogDump();
235:       PetscFinalize();
236: .ve

238:   Note:
239:   `PetscLogView()` or `PetscLogDump()` actually cause the printing of
240:   the logging information.

242:   Level: advanced

244: .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogTraceBegin()`
245: @*/
246: PetscErrorCode PetscLogDefaultBegin(void)
247: {
248:   PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault);
249:   return 0;
250: }

252: /*@C
253:   PetscLogAllBegin - Turns on extensive logging of objects and events. Logs
254:   all events. This creates large log files and slows the program down.

256:   Logically Collective on `PETSC_COMM_WORLD`

258:   Options Database Key:
259: . -log_all - Prints extensive log information

261:   Usage:
262: .vb
263:      PetscInitialize(...);
264:      PetscLogAllBegin();
265:      ... code ...
266:      PetscLogDump(filename);
267:      PetscFinalize();
268: .ve

270:   Note:
271:   A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is
272:   intended for production runs since it logs only flop rates and object
273:   creation (and shouldn't significantly slow the programs).

275:   Level: advanced

277: .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogTraceBegin()`
278: @*/
279: PetscErrorCode PetscLogAllBegin(void)
280: {
281:   PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete);
282:   return 0;
283: }

285: /*@C
286:   PetscLogTraceBegin - Activates trace logging.  Every time a PETSc event
287:   begins or ends, the event name is printed.

289:   Logically Collective on `PETSC_COMM_WORLD`

291:   Input Parameter:
292: . file - The file to print trace in (e.g. stdout)

294:   Options Database Key:
295: . -log_trace [filename] - Activates `PetscLogTraceBegin()`

297:   Notes:
298:   `PetscLogTraceBegin()` prints the processor number, the execution time (sec),
299:   then "Event begin:" or "Event end:" followed by the event name.

301:   `PetscLogTraceBegin()` allows tracing of all PETSc calls, which is useful
302:   to determine where a program is hanging without running in the
303:   debugger.  Can be used in conjunction with the -info option.

305:   Level: intermediate

307: .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogDefaultBegin()`
308: @*/
309: PetscErrorCode PetscLogTraceBegin(FILE *file)
310: {
311:   petsc_tracefile = file;

313:   PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace);
314:   return 0;
315: }

317: /*@
318:   PetscLogActions - Determines whether actions are logged for the graphical viewer.

320:   Not Collective

322:   Input Parameter:
323: . flag - `PETSC_TRUE` if actions are to be logged

325:   Options Database Key:
326: . -log_exclude_actions - Turns off actions logging

328:   Level: intermediate

330:   Note:
331:   Logging of actions continues to consume more memory as the program
332:   runs. Long running programs should consider turning this feature off.
333: .seealso: `PetscLogStagePush()`, `PetscLogStagePop()`
334: @*/
335: PetscErrorCode PetscLogActions(PetscBool flag)
336: {
337:   petsc_logActions = flag;
338:   return 0;
339: }

341: /*@
342:   PetscLogObjects - Determines whether objects are logged for the graphical viewer.

344:   Not Collective

346:   Input Parameter:
347: . flag - `PETSC_TRUE` if objects are to be logged

349:   Options Database Key:
350: . -log_exclude_objects - Turns off objects logging

352:   Level: intermediate

354:   Note:
355:   Logging of objects continues to consume more memory as the program
356:   runs. Long running programs should consider turning this feature off.

358: .seealso: `PetscLogStagePush()`, `PetscLogStagePop()`
359: @*/
360: PetscErrorCode PetscLogObjects(PetscBool flag)
361: {
362:   petsc_logObjects = flag;
363:   return 0;
364: }

366: /*------------------------------------------------ Stage Functions --------------------------------------------------*/
367: /*@C
368:   PetscLogStageRegister - Attaches a character string name to a logging stage.

370:   Not Collective

372:   Input Parameter:
373: . sname - The name to associate with that stage

375:   Output Parameter:
376: . stage - The stage number

378:   Level: intermediate

380: .seealso: `PetscLogStagePush()`, `PetscLogStagePop()`
381: @*/
382: PetscErrorCode PetscLogStageRegister(const char sname[], PetscLogStage *stage)
383: {
384:   PetscStageLog stageLog;
385:   PetscLogEvent event;

387:   PetscLogGetStageLog(&stageLog);
388:   PetscStageLogRegister(stageLog, sname, stage);
389:   /* Copy events already changed in the main stage, this sucks */
390:   PetscEventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents);
391:   for (event = 0; event < stageLog->eventLog->numEvents; event++) PetscEventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event], &stageLog->stageInfo[*stage].eventLog->eventInfo[event]);
392:   PetscClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses);
393:   return 0;
394: }

396: /*@C
397:   PetscLogStagePush - This function pushes a stage on the logging stack. Events started and stopped until `PetscLogStagePop()` will be associated with the stage

399:   Not Collective

401:   Input Parameter:
402: . stage - The stage on which to log

404:   Usage:
405:   If the option -log_view is used to run the program containing the
406:   following code, then 2 sets of summary data will be printed during
407:   PetscFinalize().
408: .vb
409:       PetscInitialize(int *argc,char ***args,0,0);
410:       [stage 0 of code]
411:       PetscLogStagePush(1);
412:       [stage 1 of code]
413:       PetscLogStagePop();
414:       PetscBarrier(...);
415:       [more stage 0 of code]
416:       PetscFinalize();
417: .ve

419:   Note:
420:   Use `PetscLogStageRegister()` to register a stage.

422:   Level: intermediate

424: .seealso: `PetscLogStagePop()`, `PetscLogStageRegister()`, `PetscBarrier()`
425: @*/
426: PetscErrorCode PetscLogStagePush(PetscLogStage stage)
427: {
428:   PetscStageLog stageLog;

430:   PetscLogGetStageLog(&stageLog);
431:   PetscStageLogPush(stageLog, stage);
432:   return 0;
433: }

435: /*@C
436:   PetscLogStagePop - This function pops a stage from the logging stack that was pushed with `PetscLogStagePush()`

438:   Not Collective

440:   Usage:
441:   If the option -log_view is used to run the program containing the
442:   following code, then 2 sets of summary data will be printed during
443:   PetscFinalize().
444: .vb
445:       PetscInitialize(int *argc,char ***args,0,0);
446:       [stage 0 of code]
447:       PetscLogStagePush(1);
448:       [stage 1 of code]
449:       PetscLogStagePop();
450:       PetscBarrier(...);
451:       [more stage 0 of code]
452:       PetscFinalize();
453: .ve

455:   Level: intermediate

457: .seealso: `PetscLogStagePush()`, `PetscLogStageRegister()`, `PetscBarrier()`
458: @*/
459: PetscErrorCode PetscLogStagePop(void)
460: {
461:   PetscStageLog stageLog;

463:   PetscLogGetStageLog(&stageLog);
464:   PetscStageLogPop(stageLog);
465:   return 0;
466: }

468: /*@
469:   PetscLogStageSetActive - Sets if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`.

471:   Not Collective

473:   Input Parameters:
474: + stage    - The stage
475: - isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`)

477:   Level: intermediate

479:   Note:
480:   If this is set to `PETSC_FALSE` the logging acts as if the stage did not exist

482: .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()`
483: @*/
484: PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive)
485: {
486:   PetscStageLog stageLog;

488:   PetscLogGetStageLog(&stageLog);
489:   PetscStageLogSetActive(stageLog, stage, isActive);
490:   return 0;
491: }

493: /*@
494:   PetscLogStageGetActive - Checks if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`.

496:   Not Collective

498:   Input Parameter:
499: . stage    - The stage

501:   Output Parameter:
502: . isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`)

504:   Level: intermediate

506: .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()`
507: @*/
508: PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscBool *isActive)
509: {
510:   PetscStageLog stageLog;

512:   PetscLogGetStageLog(&stageLog);
513:   PetscStageLogGetActive(stageLog, stage, isActive);
514:   return 0;
515: }

517: /*@
518:   PetscLogStageSetVisible - Determines stage visibility in `PetscLogView()`

520:   Not Collective

522:   Input Parameters:
523: + stage     - The stage
524: - isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`)

526:   Level: intermediate

528:   Developer Note:
529:   What does visible mean, needs to be documented.

531: .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()`
532: @*/
533: PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible)
534: {
535:   PetscStageLog stageLog;

537:   PetscLogGetStageLog(&stageLog);
538:   PetscStageLogSetVisible(stageLog, stage, isVisible);
539:   return 0;
540: }

542: /*@
543:   PetscLogStageGetVisible - Returns stage visibility in `PetscLogView()`

545:   Not Collective

547:   Input Parameter:
548: . stage     - The stage

550:   Output Parameter:
551: . isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`)

553:   Level: intermediate

555: .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()`
556: @*/
557: PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscBool *isVisible)
558: {
559:   PetscStageLog stageLog;

561:   PetscLogGetStageLog(&stageLog);
562:   PetscStageLogGetVisible(stageLog, stage, isVisible);
563:   return 0;
564: }

566: /*@C
567:   PetscLogStageGetId - Returns the stage id when given the stage name.

569:   Not Collective

571:   Input Parameter:
572: . name  - The stage name

574:   Output Parameter:
575: . stage - The stage, , or -1 if no stage with that name exists

577:   Level: intermediate

579: .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()`
580: @*/
581: PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage)
582: {
583:   PetscStageLog stageLog;

585:   PetscLogGetStageLog(&stageLog);
586:   PetscStageLogGetStage(stageLog, name, stage);
587:   return 0;
588: }

590: /*------------------------------------------------ Event Functions --------------------------------------------------*/

592: /*@C
593:   PetscLogEventRegister - Registers an event name for logging operations

595:   Not Collective

597:   Input Parameters:
598: + name   - The name associated with the event
599: - classid - The classid associated to the class for this event, obtain either with
600:            `PetscClassIdRegister()` or use a predefined one such as `KSP_CLASSID`, `SNES_CLASSID`, the predefined ones
601:            are only available in C code

603:   Output Parameter:
604: . event - The event id for use with `PetscLogEventBegin()` and `PetscLogEventEnd()`.

606:   Example of Usage:
607: .vb
608:       PetscLogEvent USER_EVENT;
609:       PetscClassId classid;
610:       PetscLogDouble user_event_flops;
611:       PetscClassIdRegister("class name",&classid);
612:       PetscLogEventRegister("User event name",classid,&USER_EVENT);
613:       PetscLogEventBegin(USER_EVENT,0,0,0,0);
614:          [code segment to monitor]
615:          PetscLogFlops(user_event_flops);
616:       PetscLogEventEnd(USER_EVENT,0,0,0,0);
617: .ve

619:   Notes:
620:   PETSc automatically logs library events if the code has been
621:   configured with --with-log (which is the default) and
622:   -log_view or -log_all is specified.  `PetscLogEventRegister()` is
623:   intended for logging user events to supplement this PETSc
624:   information.

626:   PETSc can gather data for use with the utilities Jumpshot
627:   (part of the MPICH distribution).  If PETSc has been compiled
628:   with flag -DPETSC_HAVE_MPE (MPE is an additional utility within
629:   MPICH), the user can employ another command line option, -log_mpe,
630:   to create a logfile, "mpe.log", which can be visualized
631:   Jumpshot.

633:   The classid is associated with each event so that classes of events
634:   can be disabled simultaneously, such as all matrix events. The user
635:   can either use an existing classid, such as `MAT_CLASSID`, or create
636:   their own as shown in the example.

638:   If an existing event with the same name exists, its event handle is
639:   returned instead of creating a new event.

641:   Level: intermediate

643: .seealso: `PetscLogStageRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogFlops()`,
644:           `PetscLogEventActivate()`, `PetscLogEventDeactivate()`, `PetscClassIdRegister()`
645: @*/
646: PetscErrorCode PetscLogEventRegister(const char name[], PetscClassId classid, PetscLogEvent *event)
647: {
648:   PetscStageLog stageLog;
649:   int           stage;

651:   *event = PETSC_DECIDE;
652:   PetscLogGetStageLog(&stageLog);
653:   PetscEventRegLogGetEvent(stageLog->eventLog, name, event);
654:   if (*event > 0) return 0;
655:   PetscEventRegLogRegister(stageLog->eventLog, name, classid, event);
656:   for (stage = 0; stage < stageLog->numStages; stage++) {
657:     PetscEventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents);
658:     PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses);
659:   }
660:   return 0;
661: }

663: /*@
664:   PetscLogEventSetCollective - Indicates that a particular event is collective.

666:   Not Collective

668:   Input Parameters:
669: + event - The event id
670: - collective - Boolean flag indicating whether a particular event is collective

672:   Notes:
673:   New events returned from `PetscLogEventRegister()` are collective by default.

675:   Collective events are handled specially if the -log_sync is used. In that case the logging saves information about
676:   two parts of the event; the time for all the MPI ranks to synchronize and then the time for the actual computation/communication
677:   to be performed. This option is useful to debug imbalance within the computations or communications

679:   Level: developer

681: .seealso: `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogEventRegister()`
682: @*/
683: PetscErrorCode PetscLogEventSetCollective(PetscLogEvent event, PetscBool collective)
684: {
685:   PetscStageLog    stageLog;
686:   PetscEventRegLog eventRegLog;

688:   PetscLogGetStageLog(&stageLog);
689:   PetscStageLogGetEventRegLog(stageLog, &eventRegLog);
691:   eventRegLog->eventInfo[event].collective = collective;
692:   return 0;
693: }

695: /*@
696:   PetscLogEventIncludeClass - Activates event logging for a PETSc object class in every stage.

698:   Not Collective

700:   Input Parameter:
701: . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc.

703:   Level: developer

705: .seealso: `PetscLogEventActivateClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()`
706: @*/
707: PetscErrorCode PetscLogEventIncludeClass(PetscClassId classid)
708: {
709:   PetscStageLog stageLog;
710:   int           stage;

712:   PetscLogGetStageLog(&stageLog);
713:   for (stage = 0; stage < stageLog->numStages; stage++) PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid);
714:   return 0;
715: }

717: /*@
718:   PetscLogEventExcludeClass - Deactivates event logging for a PETSc object class in every stage.

720:   Not Collective

722:   Input Parameter:
723: . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc.

725:   Level: developer

727:   Note:
728:   If a class is excluded then events associated with that class are not logged.

730: .seealso: `PetscLogEventDeactivateClass()`, `PetscLogEventActivateClass()`, `PetscLogEventDeactivate()`, `PetscLogEventActivate()`
731: @*/
732: PetscErrorCode PetscLogEventExcludeClass(PetscClassId classid)
733: {
734:   PetscStageLog stageLog;
735:   int           stage;

737:   PetscLogGetStageLog(&stageLog);
738:   for (stage = 0; stage < stageLog->numStages; stage++) PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid);
739:   return 0;
740: }

742: /*@
743:   PetscLogEventActivate - Indicates that a particular event should be logged.

745:   Not Collective

747:   Input Parameter:
748: . event - The event id

750:   Usage:
751: .vb
752:       PetscLogEventDeactivate(VEC_SetValues);
753:         [code where you do not want to log VecSetValues()]
754:       PetscLogEventActivate(VEC_SetValues);
755:         [code where you do want to log VecSetValues()]
756: .ve

758:   Note:
759:   The event may be either a pre-defined PETSc event (found in include/petsclog.h)
760:   or an event number obtained with `PetscLogEventRegister()`.

762:   Level: advanced

764: .seealso: `PlogEventDeactivate()`, `PlogEventDeactivatePush()`, `PetscLogEventDeactivatePop()`
765: @*/
766: PetscErrorCode PetscLogEventActivate(PetscLogEvent event)
767: {
768:   PetscStageLog stageLog;
769:   int           stage;

771:   PetscLogGetStageLog(&stageLog);
772:   PetscStageLogGetCurrent(stageLog, &stage);
773:   PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event);
774:   return 0;
775: }

777: /*@
778:   PetscLogEventDeactivate - Indicates that a particular event should not be logged.

780:   Not Collective

782:   Input Parameter:
783: . event - The event id

785:   Usage:
786: .vb
787:       PetscLogEventDeactivate(VEC_SetValues);
788:         [code where you do not want to log VecSetValues()]
789:       PetscLogEventActivate(VEC_SetValues);
790:         [code where you do want to log VecSetValues()]
791: .ve

793:   Note:
794:   The event may be either a pre-defined PETSc event (found in
795:   include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`).

797:   Level: advanced

799: .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`, `PetscLogEventDeactivatePop()`
800: @*/
801: PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event)
802: {
803:   PetscStageLog stageLog;
804:   int           stage;

806:   PetscLogGetStageLog(&stageLog);
807:   PetscStageLogGetCurrent(stageLog, &stage);
808:   PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event);
809:   return 0;
810: }

812: /*@
813:   PetscLogEventDeactivatePush - Indicates that a particular event should not be logged until `PetscLogEventDeactivatePop()` is called

815:   Not Collective

817:   Input Parameter:
818: . event - The event id

820:   Usage:
821: .vb
822:       PetscLogEventDeactivatePush(VEC_SetValues);
823:         [code where you do not want to log VecSetValues()]
824:       PetscLogEventDeactivatePop(VEC_SetValues);
825:         [code where you do want to log VecSetValues()]
826: .ve

828:   Note:
829:   The event may be either a pre-defined PETSc event (found in
830:   include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`).

832:   Level: advanced

834: .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePop()`, `PetscLogEventDeactivate()`
835: @*/
836: PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent event)
837: {
838:   PetscStageLog stageLog;
839:   int           stage;

841:   PetscLogGetStageLog(&stageLog);
842:   PetscStageLogGetCurrent(stageLog, &stage);
843:   PetscEventPerfLogDeactivatePush(stageLog->stageInfo[stage].eventLog, event);
844:   return 0;
845: }

847: /*@
848:   PetscLogEventDeactivatePop - Indicates that a particular event should again be logged after the logging was turned off with `PetscLogEventDeactivatePush()`

850:   Not Collective

852:   Input Parameter:
853: . event - The event id

855:   Usage:
856: .vb
857:       PetscLogEventDeactivatePush(VEC_SetValues);
858:         [code where you do not want to log VecSetValues()]
859:       PetscLogEventDeactivatePop(VEC_SetValues);
860:         [code where you do want to log VecSetValues()]
861: .ve

863:   Note:
864:   The event may be either a pre-defined PETSc event (found in
865:   include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`).

867:   Level: advanced

869: .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`
870: @*/
871: PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent event)
872: {
873:   PetscStageLog stageLog;
874:   int           stage;

876:   PetscLogGetStageLog(&stageLog);
877:   PetscStageLogGetCurrent(stageLog, &stage);
878:   PetscEventPerfLogDeactivatePop(stageLog->stageInfo[stage].eventLog, event);
879:   return 0;
880: }

882: /*@
883:   PetscLogEventSetActiveAll - Turns on logging of all events

885:   Not Collective

887:   Input Parameters:
888: + event    - The event id
889: - isActive - The activity flag determining whether the event is logged

891:   Level: advanced

893: .seealso: `PlogEventActivate()`, `PlogEventDeactivate()`
894: @*/
895: PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive)
896: {
897:   PetscStageLog stageLog;
898:   int           stage;

900:   PetscLogGetStageLog(&stageLog);
901:   for (stage = 0; stage < stageLog->numStages; stage++) {
902:     if (isActive) {
903:       PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event);
904:     } else {
905:       PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event);
906:     }
907:   }
908:   return 0;
909: }

911: /*@
912:   PetscLogEventActivateClass - Activates event logging for a PETSc object class for the current stage

914:   Not Collective

916:   Input Parameter:
917: . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc.

919:   Level: developer

921: .seealso: `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()`
922: @*/
923: PetscErrorCode PetscLogEventActivateClass(PetscClassId classid)
924: {
925:   PetscStageLog stageLog;
926:   int           stage;

928:   PetscLogGetStageLog(&stageLog);
929:   PetscStageLogGetCurrent(stageLog, &stage);
930:   PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid);
931:   return 0;
932: }

934: /*@
935:   PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class for the current stage

937:   Not Collective

939:   Input Parameter:
940: . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc.

942:   Level: developer

944: .seealso: `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventActivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()`
945: @*/
946: PetscErrorCode PetscLogEventDeactivateClass(PetscClassId classid)
947: {
948:   PetscStageLog stageLog;
949:   int           stage;

951:   PetscLogGetStageLog(&stageLog);
952:   PetscStageLogGetCurrent(stageLog, &stage);
953:   PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid);
954:   return 0;
955: }

957: /*MC
958:    PetscLogEventSync - Synchronizes the beginning of a user event.

960:    Synopsis:
961: #include <petsclog.h>
962:    PetscErrorCode PetscLogEventSync(int e,MPI_Comm comm)

964:    Collective

966:    Input Parameters:
967: +  e - integer associated with the event obtained from PetscLogEventRegister()
968: -  comm - an MPI communicator

970:    Usage:
971: .vb
972:      PetscLogEvent USER_EVENT;
973:      PetscLogEventRegister("User event",0,&USER_EVENT);
974:      PetscLogEventSync(USER_EVENT,PETSC_COMM_WORLD);
975:      PetscLogEventBegin(USER_EVENT,0,0,0,0);
976:         [code segment to monitor]
977:      PetscLogEventEnd(USER_EVENT,0,0,0,0);
978: .ve

980:    Note:
981:    This routine should be called only if there is not a
982:    `PetscObject` available to pass to `PetscLogEventBegin()`.

984:    Level: developer

986: .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`
987: M*/

989: /*MC
990:    PetscLogEventBegin - Logs the beginning of a user event.

992:    Synopsis:
993: #include <petsclog.h>
994:    PetscErrorCode PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4)

996:    Not Collective

998:    Input Parameters:
999: +  e - integer associated with the event obtained from PetscLogEventRegister()
1000: -  o1,o2,o3,o4 - objects associated with the event, or 0

1002:    Fortran Synopsis:
1003:    void PetscLogEventBegin(int e,PetscErrorCode ierr)

1005:    Usage:
1006: .vb
1007:      PetscLogEvent USER_EVENT;
1008:      PetscLogDouble user_event_flops;
1009:      PetscLogEventRegister("User event",0,&USER_EVENT);
1010:      PetscLogEventBegin(USER_EVENT,0,0,0,0);
1011:         [code segment to monitor]
1012:         PetscLogFlops(user_event_flops);
1013:      PetscLogEventEnd(USER_EVENT,0,0,0,0);
1014: .ve

1016:    Developer Note:
1017:      `PetscLogEventBegin()` and `PetscLogEventBegin()` return error codes instead of explicitly handling the
1018:      errors that occur in the macro directly because other packages that use this macros have used them in their
1019:      own functions or methods that do not return error codes and it would be disruptive to change the current
1020:      behavior.

1022:    Level: intermediate

1024: .seealso: `PetscLogEventRegister()`, `PetscLogEventEnd()`, `PetscLogFlops()`
1025: M*/

1027: /*MC
1028:    PetscLogEventEnd - Log the end of a user event.

1030:    Synopsis:
1031: #include <petsclog.h>
1032:    PetscErrorCode PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4)

1034:    Not Collective

1036:    Input Parameters:
1037: +  e - integer associated with the event obtained with PetscLogEventRegister()
1038: -  o1,o2,o3,o4 - objects associated with the event, or 0

1040:    Fortran Synopsis:
1041:    void PetscLogEventEnd(int e,PetscErrorCode ierr)

1043:    Usage:
1044: .vb
1045:      PetscLogEvent USER_EVENT;
1046:      PetscLogDouble user_event_flops;
1047:      PetscLogEventRegister("User event",0,&USER_EVENT,);
1048:      PetscLogEventBegin(USER_EVENT,0,0,0,0);
1049:         [code segment to monitor]
1050:         PetscLogFlops(user_event_flops);
1051:      PetscLogEventEnd(USER_EVENT,0,0,0,0);
1052: .ve

1054:    Level: intermediate

1056: .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogFlops()`
1057: M*/

1059: /*@C
1060:   PetscLogEventGetId - Returns the event id when given the event name.

1062:   Not Collective

1064:   Input Parameter:
1065: . name  - The event name

1067:   Output Parameter:
1068: . event - The event, or -1 if no event with that name exists

1070:   Level: intermediate

1072: .seealso: `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStageGetId()`
1073: @*/
1074: PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event)
1075: {
1076:   PetscStageLog stageLog;

1078:   PetscLogGetStageLog(&stageLog);
1079:   PetscEventRegLogGetEvent(stageLog->eventLog, name, event);
1080:   return 0;
1081: }

1083: PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent event)
1084: {
1085:   PetscIntStackPush(current_log_event_stack, event);
1086:   return 0;
1087: }

1089: PetscErrorCode PetscLogPopCurrentEvent_Internal(void)
1090: {
1091:   PetscIntStackPop(current_log_event_stack, NULL);
1092:   return 0;
1093: }

1095: PetscErrorCode PetscLogGetCurrentEvent_Internal(PetscLogEvent *event)
1096: {
1097:   PetscBool empty;

1100:   *event = PETSC_DECIDE;
1101:   PetscIntStackEmpty(current_log_event_stack, &empty);
1102:   if (!empty) PetscIntStackTop(current_log_event_stack, event);
1103:   return 0;
1104: }

1106: PetscErrorCode PetscLogEventPause_Internal(PetscLogEvent event)
1107: {
1108:   if (event != PETSC_DECIDE) PetscLogEventEnd(event, NULL, NULL, NULL, NULL);
1109:   return 0;
1110: }

1112: PetscErrorCode PetscLogEventResume_Internal(PetscLogEvent event)
1113: {
1114:   PetscStageLog     stageLog;
1115:   PetscEventPerfLog eventLog;
1116:   int               stage;

1118:   if (event == PETSC_DECIDE) return 0;
1119:   PetscLogEventBegin(event, NULL, NULL, NULL, NULL);
1120:   PetscLogGetStageLog(&stageLog);
1121:   PetscStageLogGetCurrent(stageLog, &stage);
1122:   PetscStageLogGetEventPerfLog(stageLog, stage, &eventLog);
1123:   eventLog->eventInfo[event].count--;
1124:   return 0;
1125: }

1127: /*------------------------------------------------ Output Functions -------------------------------------------------*/
1128: /*@C
1129:   PetscLogDump - Dumps logs of objects to a file. This file is intended to
1130:   be read by bin/petscview. This program no longer exists.

1132:   Collective on `PETSC_COMM_WORLD`

1134:   Input Parameter:
1135: . name - an optional file name

1137:   Usage:
1138: .vb
1139:      PetscInitialize(...);
1140:      PetscLogDefaultBegin(); or PetscLogAllBegin();
1141:      ... code ...
1142:      PetscLogDump(filename);
1143:      PetscFinalize();
1144: .ve

1146:   Note:
1147:   The default file name is
1148: $    Log.<rank>
1149:   where <rank> is the processor number. If no name is specified,
1150:   this file will be used.

1152:   Level: advanced

1154: .seealso: `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogView()`
1155: @*/
1156: PetscErrorCode PetscLogDump(const char sname[])
1157: {
1158:   PetscStageLog       stageLog;
1159:   PetscEventPerfInfo *eventInfo;
1160:   FILE               *fd;
1161:   char                file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN];
1162:   PetscLogDouble      flops, _TotalTime;
1163:   PetscMPIInt         rank;
1164:   int                 action, object, curStage;
1165:   PetscLogEvent       event;

1167:   /* Calculate the total elapsed time */
1168:   PetscTime(&_TotalTime);
1169:   _TotalTime -= petsc_BaseTime;
1170:   /* Open log file */
1171:   MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
1172:   if (sname && sname[0]) sprintf(file, "%s.%d", sname, rank);
1173:   else sprintf(file, "Log.%d", rank);
1174:   PetscFixFilename(file, fname);
1175:   PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd);
1177:   /* Output totals */
1178:   PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime);
1179:   PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0);
1180:   /* Output actions */
1181:   if (petsc_logActions) {
1182:     PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", petsc_numActions);
1183:     for (action = 0; action < petsc_numActions; action++) {
1184:       PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n", petsc_actions[action].time, petsc_actions[action].action, (int)petsc_actions[action].event, (int)petsc_actions[action].classid, petsc_actions[action].id1,
1185:                              petsc_actions[action].id2, petsc_actions[action].id3, petsc_actions[action].flops, petsc_actions[action].mem, petsc_actions[action].maxmem));
1186:     }
1187:   }
1188:   /* Output objects */
1189:   if (petsc_logObjects) {
1190:     PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", petsc_numObjects, petsc_numObjectsDestroyed);
1191:     for (object = 0; object < petsc_numObjects; object++) {
1192:       PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", petsc_objects[object].parent, (int)petsc_objects[object].mem);
1193:       if (!petsc_objects[object].name[0]) {
1194:         PetscFPrintf(PETSC_COMM_WORLD, fd, "No Name\n");
1195:       } else {
1196:         PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", petsc_objects[object].name);
1197:       }
1198:       if (petsc_objects[object].info[0] != 0) {
1199:         PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n");
1200:       } else {
1201:         PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", petsc_objects[object].info);
1202:       }
1203:     }
1204:   }
1205:   /* Output events */
1206:   PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n");
1207:   PetscLogGetStageLog(&stageLog);
1208:   PetscIntStackTop(stageLog->stack, &curStage);
1209:   eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo;
1210:   for (event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) {
1211:     if (eventInfo[event].time != 0.0) flops = eventInfo[event].flops / eventInfo[event].time;
1212:     else flops = 0.0;
1213:     PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count, eventInfo[event].flops, eventInfo[event].time, flops);
1214:   }
1215:   PetscFClose(PETSC_COMM_WORLD, fd);
1216:   return 0;
1217: }

1219: /*
1220:   PetscLogView_Detailed - Each process prints the times for its own events

1222: */
1223: PetscErrorCode PetscLogView_Detailed(PetscViewer viewer)
1224: {
1225:   PetscStageLog       stageLog;
1226:   PetscEventPerfInfo *eventInfo = NULL, *stageInfo = NULL;
1227:   PetscLogDouble      locTotalTime, numRed, maxMem;
1228:   int                 numStages, numEvents, stage, event;
1229:   MPI_Comm            comm = PetscObjectComm((PetscObject)viewer);
1230:   PetscMPIInt         rank, size;

1232:   MPI_Comm_size(comm, &size);
1233:   MPI_Comm_rank(comm, &rank);
1234:   /* Must preserve reduction count before we go on */
1235:   numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
1236:   /* Get the total elapsed time */
1237:   PetscTime(&locTotalTime);
1238:   locTotalTime -= petsc_BaseTime;
1239:   PetscViewerASCIIPrintf(viewer, "size = %d\n", size);
1240:   PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n");
1241:   PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n");
1242:   PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n");
1243:   PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n");
1244:   PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n");
1245:   PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n");
1246:   PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n");
1247:   PetscLogGetStageLog(&stageLog);
1248:   MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm);
1249:   PetscViewerASCIIPrintf(viewer, "Stages = {}\n");
1250:   for (stage = 0; stage < numStages; stage++) {
1251:     PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stageLog->stageInfo[stage].name);
1252:     PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stageLog->stageInfo[stage].name);
1253:     MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
1254:     for (event = 0; event < numEvents; event++) PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name);
1255:   }
1256:   PetscMallocGetMaximumUsage(&maxMem);
1257:   PetscViewerASCIIPushSynchronized(viewer);
1258:   PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime);
1259:   PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct));
1260:   PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len));
1261:   PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed);
1262:   PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops);
1263:   PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %d\n", rank, petsc_numObjects);
1264:   PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem);
1265:   PetscViewerFlush(viewer);
1266:   for (stage = 0; stage < numStages; stage++) {
1267:     stageInfo = &stageLog->stageInfo[stage].perfInfo;
1268:     PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stageLog->stageInfo[stage].name, rank, stageInfo->time,
1269:                                                  stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops));
1270:     MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
1271:     for (event = 0; event < numEvents; event++) {
1272:       eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event];
1273:       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g",
1274:                                                    stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions,
1275:                                                    eventInfo->flops));
1276:       if (eventInfo->dof[0] >= 0.) {
1277:         PetscInt d, e;

1279:         PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [");
1280:         for (d = 0; d < 8; ++d) {
1281:           if (d > 0) PetscViewerASCIISynchronizedPrintf(viewer, ", ");
1282:           PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d]);
1283:         }
1284:         PetscViewerASCIISynchronizedPrintf(viewer, "]");
1285:         PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [");
1286:         for (e = 0; e < 8; ++e) {
1287:           if (e > 0) PetscViewerASCIISynchronizedPrintf(viewer, ", ");
1288:           PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e]);
1289:         }
1290:         PetscViewerASCIISynchronizedPrintf(viewer, "]");
1291:       }
1292:       PetscViewerASCIISynchronizedPrintf(viewer, "}\n");
1293:     }
1294:   }
1295:   PetscViewerFlush(viewer);
1296:   PetscViewerASCIIPopSynchronized(viewer);
1297:   return 0;
1298: }

1300: /*
1301:   PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format
1302: */
1303: PetscErrorCode PetscLogView_CSV(PetscViewer viewer)
1304: {
1305:   PetscStageLog       stageLog;
1306:   PetscEventPerfInfo *eventInfo = NULL;
1307:   PetscLogDouble      locTotalTime, maxMem;
1308:   int                 numStages, numEvents, stage, event;
1309:   MPI_Comm            comm = PetscObjectComm((PetscObject)viewer);
1310:   PetscMPIInt         rank, size;

1312:   MPI_Comm_size(comm, &size);
1313:   MPI_Comm_rank(comm, &rank);
1314:   /* Must preserve reduction count before we go on */
1315:   /* Get the total elapsed time */
1316:   PetscTime(&locTotalTime);
1317:   locTotalTime -= petsc_BaseTime;
1318:   PetscLogGetStageLog(&stageLog);
1319:   MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm);
1320:   PetscMallocGetMaximumUsage(&maxMem);
1321:   PetscViewerASCIIPushSynchronized(viewer);
1322:   PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size);
1323:   PetscViewerFlush(viewer);
1324:   for (stage = 0; stage < numStages; stage++) {
1325:     PetscEventPerfInfo *stageInfo = &stageLog->stageInfo[stage].perfInfo;

1327:     PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops);
1328:     MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
1329:     for (event = 0; event < numEvents; event++) {
1330:       eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event];
1331:       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength,
1332:                                                    eventInfo->numReductions, eventInfo->flops));
1333:       if (eventInfo->dof[0] >= 0.) {
1334:         PetscInt d, e;

1336:         for (d = 0; d < 8; ++d) PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d]);
1337:         for (e = 0; e < 8; ++e) PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e]);
1338:       }
1339:       PetscViewerASCIISynchronizedPrintf(viewer, "\n");
1340:     }
1341:   }
1342:   PetscViewerFlush(viewer);
1343:   PetscViewerASCIIPopSynchronized(viewer);
1344:   return 0;
1345: }

1347: static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm, FILE *fd)
1348: {
1349:   if (!PetscLogSyncOn) return 0;
1350:   PetscFPrintf(comm, fd, "\n\n");
1351:   PetscFPrintf(comm, fd, "      ##########################################################\n");
1352:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1353:   PetscFPrintf(comm, fd, "      #                       WARNING!!!                       #\n");
1354:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1355:   PetscFPrintf(comm, fd, "      #   This program was run with logging synchronization.   #\n");
1356:   PetscFPrintf(comm, fd, "      #   This option provides more meaningful imbalance       #\n");
1357:   PetscFPrintf(comm, fd, "      #   figures at the expense of slowing things down and    #\n");
1358:   PetscFPrintf(comm, fd, "      #   providing a distorted view of the overall runtime.   #\n");
1359:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1360:   PetscFPrintf(comm, fd, "      ##########################################################\n\n\n");
1361:   return 0;
1362: }

1364: static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm, FILE *fd)
1365: {
1366:   if (PetscDefined(USE_DEBUG)) {
1367:     PetscFPrintf(comm, fd, "\n\n");
1368:     PetscFPrintf(comm, fd, "      ##########################################################\n");
1369:     PetscFPrintf(comm, fd, "      #                                                        #\n");
1370:     PetscFPrintf(comm, fd, "      #                       WARNING!!!                       #\n");
1371:     PetscFPrintf(comm, fd, "      #                                                        #\n");
1372:     PetscFPrintf(comm, fd, "      #   This code was compiled with a debugging option.      #\n");
1373:     PetscFPrintf(comm, fd, "      #   To get timing results run ./configure                #\n");
1374:     PetscFPrintf(comm, fd, "      #   using --with-debugging=no, the performance will      #\n");
1375:     PetscFPrintf(comm, fd, "      #   be generally two or three times faster.              #\n");
1376:     PetscFPrintf(comm, fd, "      #                                                        #\n");
1377:     PetscFPrintf(comm, fd, "      ##########################################################\n\n\n");
1378:   }
1379:   return 0;
1380: }

1382: static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm, FILE *fd)
1383: {
1384:   #if defined(PETSC_HAVE_DEVICE)
1385:   PetscMPIInt size;
1386:   PetscBool   deviceInitialized = PETSC_FALSE;

1388:   MPI_Comm_size(comm, &size);
1389:   for (int i = PETSC_DEVICE_HOST + 1; i < PETSC_DEVICE_MAX; ++i) {
1390:     const PetscDeviceType dtype = PetscDeviceTypeCast(i);
1391:     if (PetscDeviceInitialized(dtype)) { /* a non-host device was initialized */
1392:       deviceInitialized = PETSC_TRUE;
1393:       break;
1394:     }
1395:   }
1396:   /* the last condition says petsc is configured with device but it is a pure CPU run, so don't print misleading warnings */
1397:   if (use_gpu_aware_mpi || size == 1 || !deviceInitialized) return 0;
1398:   PetscFPrintf(comm, fd, "\n\n");
1399:   PetscFPrintf(comm, fd, "      ##########################################################\n");
1400:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1401:   PetscFPrintf(comm, fd, "      #                       WARNING!!!                       #\n");
1402:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1403:   PetscFPrintf(comm, fd, "      #   This code was compiled with GPU support and you've   #\n");
1404:   PetscFPrintf(comm, fd, "      #   created PETSc/GPU objects, but you intentionally     #\n");
1405:   PetscFPrintf(comm, fd, "      #   used -use_gpu_aware_mpi 0, requiring PETSc to copy   #\n");
1406:   PetscFPrintf(comm, fd, "      #   additional data between the GPU and CPU. To obtain   #\n");
1407:   PetscFPrintf(comm, fd, "      #   meaningful timing results on multi-rank runs, use    #\n");
1408:   PetscFPrintf(comm, fd, "      #   GPU-aware MPI instead.                               #\n");
1409:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1410:   PetscFPrintf(comm, fd, "      ##########################################################\n\n\n");
1411:   return 0;
1412:   #else
1413:   return 0;
1414:   #endif
1415: }

1417: static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm, FILE *fd)
1418: {
1419:   #if defined(PETSC_HAVE_DEVICE)

1421:   if (!PetscLogGpuTimeFlag || petsc_gflops == 0) return 0;
1422:   PetscFPrintf(comm, fd, "\n\n");
1423:   PetscFPrintf(comm, fd, "      ##########################################################\n");
1424:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1425:   PetscFPrintf(comm, fd, "      #                       WARNING!!!                       #\n");
1426:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1427:   PetscFPrintf(comm, fd, "      #   This code was run with -log_view_gpu_time            #\n");
1428:   PetscFPrintf(comm, fd, "      #   This provides accurate timing within the GPU kernels #\n");
1429:   PetscFPrintf(comm, fd, "      #   but can slow down the entire computation by a        #\n");
1430:   PetscFPrintf(comm, fd, "      #   measurable amount. For fastest runs we recommend     #\n");
1431:   PetscFPrintf(comm, fd, "      #   not using this option.                               #\n");
1432:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1433:   PetscFPrintf(comm, fd, "      ##########################################################\n\n\n");
1434:   return 0;
1435:   #else
1436:   return 0;
1437:   #endif
1438: }

1440: PetscErrorCode PetscLogView_Default(PetscViewer viewer)
1441: {
1442:   FILE               *fd;
1443:   PetscLogDouble      zero = 0.0;
1444:   PetscStageLog       stageLog;
1445:   PetscStageInfo     *stageInfo = NULL;
1446:   PetscEventPerfInfo *eventInfo = NULL;
1447:   PetscClassPerfInfo *classInfo;
1448:   char                arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128];
1449:   const char         *name;
1450:   PetscLogDouble      locTotalTime, TotalTime, TotalFlops;
1451:   PetscLogDouble      numMessages, messageLength, avgMessLen, numReductions;
1452:   PetscLogDouble      stageTime, flops, flopr, mem, mess, messLen, red;
1453:   PetscLogDouble      fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed;
1454:   PetscLogDouble      fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed;
1455:   PetscLogDouble      min, max, tot, ratio, avg, x, y;
1456:   PetscLogDouble      minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax;
1457:   #if defined(PETSC_HAVE_DEVICE)
1458:   PetscLogEvent  KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */
1459:   PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops;
1460:   #endif
1461:   PetscMPIInt    minC, maxC;
1462:   PetscMPIInt    size, rank;
1463:   PetscBool     *localStageUsed, *stageUsed;
1464:   PetscBool     *localStageVisible, *stageVisible;
1465:   int            numStages, localNumEvents, numEvents;
1466:   int            stage, oclass;
1467:   PetscLogEvent  event;
1468:   PetscErrorCode 0;
1469:   char           version[256];
1470:   MPI_Comm       comm;
1471:   #if defined(PETSC_HAVE_DEVICE)
1472:   PetscLogEvent eventid;
1473:   PetscInt64    nas = 0x7FF0000000000002;
1474:   #endif

1476:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1477:   PetscObjectGetComm((PetscObject)viewer, &comm);
1478:   PetscViewerASCIIGetPointer(viewer, &fd);
1479:   MPI_Comm_size(comm, &size);
1480:   MPI_Comm_rank(comm, &rank);
1481:   /* Get the total elapsed time */
1482:   PetscTime(&locTotalTime);
1483:   locTotalTime -= petsc_BaseTime;

1485:   PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n");
1486:   PetscFPrintf(comm, fd, "***                                WIDEN YOUR WINDOW TO 160 CHARACTERS.  Use 'enscript -r -fCourier9' to print this document                                 ***\n");
1487:   PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n");
1488:   PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n");
1489:   PetscLogViewWarnSync(comm, fd);
1490:   PetscLogViewWarnDebugging(comm, fd);
1491:   PetscLogViewWarnNoGpuAwareMpi(comm, fd);
1492:   PetscLogViewWarnGpuTime(comm, fd);
1493:   PetscGetArchType(arch, sizeof(arch));
1494:   PetscGetHostName(hostname, sizeof(hostname));
1495:   PetscGetUserName(username, sizeof(username));
1496:   PetscGetProgramName(pname, sizeof(pname));
1497:   PetscGetDate(date, sizeof(date));
1498:   PetscGetVersion(version, sizeof(version));
1499:   if (size == 1) {
1500:     PetscFPrintf(comm, fd, "%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date);
1501:   } else {
1502:     PetscFPrintf(comm, fd, "%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date);
1503:   }
1504:   #if defined(PETSC_HAVE_OPENMP)
1505:   PetscFPrintf(comm, fd, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads);
1506:   #endif
1507:   PetscFPrintf(comm, fd, "Using %s\n", version);

1509:   /* Must preserve reduction count before we go on */
1510:   red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;

1512:   /* Calculate summary information */
1513:   PetscFPrintf(comm, fd, "\n                         Max       Max/Min     Avg       Total\n");
1514:   /*   Time */
1515:   MPI_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1516:   MPI_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1517:   MPI_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1518:   avg = tot / ((PetscLogDouble)size);
1519:   if (min != 0.0) ratio = max / min;
1520:   else ratio = 0.0;
1521:   PetscFPrintf(comm, fd, "Time (sec):           %5.3e   %7.3f   %5.3e\n", max, ratio, avg);
1522:   TotalTime = tot;
1523:   /*   Objects */
1524:   avg = (PetscLogDouble)petsc_numObjects;
1525:   MPI_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1526:   MPI_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1527:   MPI_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1528:   avg = tot / ((PetscLogDouble)size);
1529:   if (min != 0.0) ratio = max / min;
1530:   else ratio = 0.0;
1531:   PetscFPrintf(comm, fd, "Objects:              %5.3e   %7.3f   %5.3e\n", max, ratio, avg);
1532:   /*   Flops */
1533:   MPI_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1534:   MPI_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1535:   MPI_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1536:   avg = tot / ((PetscLogDouble)size);
1537:   if (min != 0.0) ratio = max / min;
1538:   else ratio = 0.0;
1539:   PetscFPrintf(comm, fd, "Flops:                %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot);
1540:   TotalFlops = tot;
1541:   /*   Flops/sec -- Must talk to Barry here */
1542:   if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime;
1543:   else flops = 0.0;
1544:   MPI_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1545:   MPI_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1546:   MPI_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1547:   avg = tot / ((PetscLogDouble)size);
1548:   if (min != 0.0) ratio = max / min;
1549:   else ratio = 0.0;
1550:   PetscFPrintf(comm, fd, "Flops/sec:            %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot);
1551:   /*   Memory */
1552:   PetscMallocGetMaximumUsage(&mem);
1553:   if (mem > 0.0) {
1554:     MPI_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1555:     MPI_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1556:     MPI_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1557:     avg = tot / ((PetscLogDouble)size);
1558:     if (min != 0.0) ratio = max / min;
1559:     else ratio = 0.0;
1560:     PetscFPrintf(comm, fd, "Memory (bytes):       %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot);
1561:   }
1562:   /*   Messages */
1563:   mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
1564:   MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1565:   MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1566:   MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1567:   avg = tot / ((PetscLogDouble)size);
1568:   if (min != 0.0) ratio = max / min;
1569:   else ratio = 0.0;
1570:   PetscFPrintf(comm, fd, "MPI Msg Count:        %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot);
1571:   numMessages = tot;
1572:   /*   Message Lengths */
1573:   mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
1574:   MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1575:   MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1576:   MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1577:   if (numMessages != 0) avg = tot / numMessages;
1578:   else avg = 0.0;
1579:   if (min != 0.0) ratio = max / min;
1580:   else ratio = 0.0;
1581:   PetscFPrintf(comm, fd, "MPI Msg Len (bytes):  %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot);
1582:   messageLength = tot;
1583:   /*   Reductions */
1584:   MPI_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1585:   MPI_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1586:   MPI_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1587:   if (min != 0.0) ratio = max / min;
1588:   else ratio = 0.0;
1589:   PetscFPrintf(comm, fd, "MPI Reductions:       %5.3e   %7.3f\n", max, ratio);
1590:   numReductions = red; /* wrong because uses count from process zero */
1591:   PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n");
1592:   PetscFPrintf(comm, fd, "                            e.g., VecAXPY() for real vectors of length N --> 2N flops\n");
1593:   PetscFPrintf(comm, fd, "                            and VecAXPY() for complex vectors of length N --> 8N flops\n");

1595:   /* Get total number of stages --
1596:        Currently, a single processor can register more stages than another, but stages must all be registered in order.
1597:        We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID.
1598:        This seems best accomplished by assoicating a communicator with each stage.
1599:   */
1600:   PetscLogGetStageLog(&stageLog);
1601:   MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm);
1602:   PetscMalloc1(numStages, &localStageUsed);
1603:   PetscMalloc1(numStages, &stageUsed);
1604:   PetscMalloc1(numStages, &localStageVisible);
1605:   PetscMalloc1(numStages, &stageVisible);
1606:   if (numStages > 0) {
1607:     stageInfo = stageLog->stageInfo;
1608:     for (stage = 0; stage < numStages; stage++) {
1609:       if (stage < stageLog->numStages) {
1610:         localStageUsed[stage]    = stageInfo[stage].used;
1611:         localStageVisible[stage] = stageInfo[stage].perfInfo.visible;
1612:       } else {
1613:         localStageUsed[stage]    = PETSC_FALSE;
1614:         localStageVisible[stage] = PETSC_TRUE;
1615:       }
1616:     }
1617:     MPI_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm);
1618:     MPI_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm);
1619:     for (stage = 0; stage < numStages; stage++) {
1620:       if (stageUsed[stage]) {
1621:         PetscFPrintf(comm, fd, "\nSummary of Stages:   ----- Time ------  ----- Flop ------  --- Messages ---  -- Message Lengths --  -- Reductions --\n");
1622:         PetscFPrintf(comm, fd, "                        Avg     %%Total     Avg     %%Total    Count   %%Total     Avg         %%Total    Count   %%Total\n");
1623:         break;
1624:       }
1625:     }
1626:     for (stage = 0; stage < numStages; stage++) {
1627:       if (!stageUsed[stage]) continue;
1628:       /* CANNOT use MPI_Allreduce() since it might fail the line number check */
1629:       if (localStageUsed[stage]) {
1630:         MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1631:         MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1632:         MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1633:         MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1634:         MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1635:         name = stageInfo[stage].name;
1636:       } else {
1637:         MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1638:         MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1639:         MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1640:         MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1641:         MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1642:         name = "";
1643:       }
1644:       mess *= 0.5;
1645:       messLen *= 0.5;
1646:       red /= size;
1647:       if (TotalTime != 0.0) fracTime = stageTime / TotalTime;
1648:       else fracTime = 0.0;
1649:       if (TotalFlops != 0.0) fracFlops = flops / TotalFlops;
1650:       else fracFlops = 0.0;
1651:       /* Talk to Barry if (stageTime     != 0.0) flops          = (size*flops)/stageTime; else flops          = 0.0; */
1652:       if (numMessages != 0.0) fracMessages = mess / numMessages;
1653:       else fracMessages = 0.0;
1654:       if (mess != 0.0) avgMessLen = messLen / mess;
1655:       else avgMessLen = 0.0;
1656:       if (messageLength != 0.0) fracLength = messLen / messageLength;
1657:       else fracLength = 0.0;
1658:       if (numReductions != 0.0) fracReductions = red / numReductions;
1659:       else fracReductions = 0.0;
1660:       PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%%  %6.4e %5.1f%%  %5.3e %5.1f%%  %5.3e      %5.1f%%  %5.3e %5.1f%%\n", stage, name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions);
1661:     }
1662:   }

1664:   PetscFPrintf(comm, fd, "\n------------------------------------------------------------------------------------------------------------------------\n");
1665:   PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n");
1666:   PetscFPrintf(comm, fd, "Phase summary info:\n");
1667:   PetscFPrintf(comm, fd, "   Count: number of times phase was executed\n");
1668:   PetscFPrintf(comm, fd, "   Time and Flop: Max - maximum over all processors\n");
1669:   PetscFPrintf(comm, fd, "                  Ratio - ratio of maximum to minimum over all processors\n");
1670:   PetscFPrintf(comm, fd, "   Mess: number of messages sent\n");
1671:   PetscFPrintf(comm, fd, "   AvgLen: average message length (bytes)\n");
1672:   PetscFPrintf(comm, fd, "   Reduct: number of global reductions\n");
1673:   PetscFPrintf(comm, fd, "   Global: entire computation\n");
1674:   PetscFPrintf(comm, fd, "   Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n");
1675:   PetscFPrintf(comm, fd, "      %%T - percent time in this phase         %%F - percent flop in this phase\n");
1676:   PetscFPrintf(comm, fd, "      %%M - percent messages in this phase     %%L - percent message lengths in this phase\n");
1677:   PetscFPrintf(comm, fd, "      %%R - percent reductions in this phase\n");
1678:   PetscFPrintf(comm, fd, "   Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n");
1679:   if (PetscLogMemory) {
1680:     PetscFPrintf(comm, fd, "   Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n");
1681:     PetscFPrintf(comm, fd, "   EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n");
1682:     PetscFPrintf(comm, fd, "   MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n");
1683:     PetscFPrintf(comm, fd, "   RMI Mbytes: Increase in resident memory (sum over all calls to event)\n");
1684:   }
1685:   #if defined(PETSC_HAVE_DEVICE)
1686:   PetscFPrintf(comm, fd, "   GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n");
1687:   PetscFPrintf(comm, fd, "   CpuToGpu Count: total number of CPU to GPU copies per processor\n");
1688:   PetscFPrintf(comm, fd, "   CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n");
1689:   PetscFPrintf(comm, fd, "   GpuToCpu Count: total number of GPU to CPU copies per processor\n");
1690:   PetscFPrintf(comm, fd, "   GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n");
1691:   PetscFPrintf(comm, fd, "   GPU %%F: percent flops on GPU in this event\n");
1692:   #endif
1693:   PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n");

1695:   PetscLogViewWarnDebugging(comm, fd);

1697:   /* Report events */
1698:   PetscFPrintf(comm, fd, "Event                Count      Time (sec)     Flop                              --- Global ---  --- Stage ----  Total");
1699:   if (PetscLogMemory) PetscFPrintf(comm, fd, "  Malloc EMalloc MMalloc RMI");
1700:   #if defined(PETSC_HAVE_DEVICE)
1701:   PetscFPrintf(comm, fd, "   GPU    - CpuToGpu -   - GpuToCpu - GPU");
1702:   #endif
1703:   PetscFPrintf(comm, fd, "\n");
1704:   PetscFPrintf(comm, fd, "                   Max Ratio  Max     Ratio   Max  Ratio  Mess   AvgLen  Reduct  %%T %%F %%M %%L %%R  %%T %%F %%M %%L %%R Mflop/s");
1705:   if (PetscLogMemory) PetscFPrintf(comm, fd, " Mbytes Mbytes Mbytes Mbytes");
1706:   #if defined(PETSC_HAVE_DEVICE)
1707:   PetscFPrintf(comm, fd, " Mflop/s Count   Size   Count   Size  %%F");
1708:   #endif
1709:   PetscFPrintf(comm, fd, "\n");
1710:   PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------");
1711:   if (PetscLogMemory) PetscFPrintf(comm, fd, "-----------------------------");
1712:   #if defined(PETSC_HAVE_DEVICE)
1713:   PetscFPrintf(comm, fd, "---------------------------------------");
1714:   #endif
1715:   PetscFPrintf(comm, fd, "\n");

1717:   #if defined(PETSC_HAVE_DEVICE)
1718:   /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */
1719:   PetscEventRegLogGetEvent(stageLog->eventLog, "TAOSolve", &TAO_Solve);
1720:   PetscEventRegLogGetEvent(stageLog->eventLog, "TSStep", &TS_Step);
1721:   PetscEventRegLogGetEvent(stageLog->eventLog, "SNESSolve", &SNES_Solve);
1722:   PetscEventRegLogGetEvent(stageLog->eventLog, "KSPSolve", &KSP_Solve);
1723:   #endif

1725:   /* Problem: The stage name will not show up unless the stage executed on proc 1 */
1726:   for (stage = 0; stage < numStages; stage++) {
1727:     if (!stageVisible[stage]) continue;
1728:     /* CANNOT use MPI_Allreduce() since it might fail the line number check */
1729:     if (localStageUsed[stage]) {
1730:       PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name);
1731:       MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1732:       MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1733:       MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1734:       MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1735:       MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1736:     } else {
1737:       PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);
1738:       MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1739:       MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1740:       MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1741:       MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1742:       MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1743:     }
1744:     mess *= 0.5;
1745:     messLen *= 0.5;
1746:     red /= size;

1748:     /* Get total number of events in this stage --
1749:        Currently, a single processor can register more events than another, but events must all be registered in order,
1750:        just like stages. We can removed this requirement if necessary by having a global event numbering and indirection
1751:        on the event ID. This seems best accomplished by associating a communicator with each stage.

1753:        Problem: If the event did not happen on proc 1, its name will not be available.
1754:        Problem: Event visibility is not implemented
1755:     */
1756:     if (localStageUsed[stage]) {
1757:       eventInfo      = stageLog->stageInfo[stage].eventLog->eventInfo;
1758:       localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents;
1759:     } else localNumEvents = 0;
1760:     MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
1761:     for (event = 0; event < numEvents; event++) {
1762:       /* CANNOT use MPI_Allreduce() since it might fail the line number check */
1763:       if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) {
1764:         if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) flopr = eventInfo[event].flops;
1765:         else flopr = 0.0;
1766:         MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1767:         MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1768:         MPI_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1769:         MPI_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1770:         MPI_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1771:         MPI_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1772:         MPI_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1773:         MPI_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1774:         MPI_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1775:         MPI_Allreduce(&eventInfo[event].count, &minC, 1, MPI_INT, MPI_MIN, comm);
1776:         MPI_Allreduce(&eventInfo[event].count, &maxC, 1, MPI_INT, MPI_MAX, comm);
1777:         if (PetscLogMemory) {
1778:           MPI_Allreduce(&eventInfo[event].memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1779:           MPI_Allreduce(&eventInfo[event].mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1780:           MPI_Allreduce(&eventInfo[event].mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1781:           MPI_Allreduce(&eventInfo[event].mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1782:         }
1783:   #if defined(PETSC_HAVE_DEVICE)
1784:         MPI_Allreduce(&eventInfo[event].CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1785:         MPI_Allreduce(&eventInfo[event].GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1786:         MPI_Allreduce(&eventInfo[event].CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1787:         MPI_Allreduce(&eventInfo[event].GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1788:         MPI_Allreduce(&eventInfo[event].GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1789:         MPI_Allreduce(&eventInfo[event].GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1790:   #endif
1791:         name = stageLog->eventLog->eventInfo[event].name;
1792:       } else {
1793:         flopr = 0.0;
1794:         MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1795:         MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1796:         MPI_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1797:         MPI_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1798:         MPI_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1799:         MPI_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1800:         MPI_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1801:         MPI_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1802:         MPI_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1803:         MPI_Allreduce(&ierr, &minC, 1, MPI_INT, MPI_MIN, comm);
1804:         MPI_Allreduce(&ierr, &maxC, 1, MPI_INT, MPI_MAX, comm);
1805:         if (PetscLogMemory) {
1806:           MPI_Allreduce(&zero, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1807:           MPI_Allreduce(&zero, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1808:           MPI_Allreduce(&zero, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1809:           MPI_Allreduce(&zero, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1810:         }
1811:   #if defined(PETSC_HAVE_DEVICE)
1812:         MPI_Allreduce(&zero, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1813:         MPI_Allreduce(&zero, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1814:         MPI_Allreduce(&zero, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1815:         MPI_Allreduce(&zero, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1816:         MPI_Allreduce(&zero, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1817:         MPI_Allreduce(&zero, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1818:   #endif
1819:         name = "";
1820:       }
1821:       if (mint < 0.0) {
1822:         PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, name);
1823:         mint = 0;
1824:       }
1826:   /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */
1827:   #if defined(PETSC_HAVE_DEVICE)
1828:       if (!PetscLogGpuTimeFlag && petsc_gflops > 0) {
1829:         memcpy(&gmaxt, &nas, sizeof(PetscLogDouble));
1830:         PetscEventRegLogGetEvent(stageLog->eventLog, name, &eventid);
1831:         if (eventid != SNES_Solve && eventid != KSP_Solve && eventid != TS_Step && eventid != TAO_Solve) {
1832:           memcpy(&mint, &nas, sizeof(PetscLogDouble));
1833:           memcpy(&maxt, &nas, sizeof(PetscLogDouble));
1834:         }
1835:       }
1836:   #endif
1837:       totm *= 0.5;
1838:       totml *= 0.5;
1839:       totr /= size;

1841:       if (maxC != 0) {
1842:         if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC;
1843:         else ratC = 0.0;
1844:         if (mint != 0.0) ratt = maxt / mint;
1845:         else ratt = 0.0;
1846:         if (minf != 0.0) ratf = maxf / minf;
1847:         else ratf = 0.0;
1848:         if (TotalTime != 0.0) fracTime = tott / TotalTime;
1849:         else fracTime = 0.0;
1850:         if (TotalFlops != 0.0) fracFlops = totf / TotalFlops;
1851:         else fracFlops = 0.0;
1852:         if (stageTime != 0.0) fracStageTime = tott / stageTime;
1853:         else fracStageTime = 0.0;
1854:         if (flops != 0.0) fracStageFlops = totf / flops;
1855:         else fracStageFlops = 0.0;
1856:         if (numMessages != 0.0) fracMess = totm / numMessages;
1857:         else fracMess = 0.0;
1858:         if (messageLength != 0.0) fracMessLen = totml / messageLength;
1859:         else fracMessLen = 0.0;
1860:         if (numReductions != 0.0) fracRed = totr / numReductions;
1861:         else fracRed = 0.0;
1862:         if (mess != 0.0) fracStageMess = totm / mess;
1863:         else fracStageMess = 0.0;
1864:         if (messLen != 0.0) fracStageMessLen = totml / messLen;
1865:         else fracStageMessLen = 0.0;
1866:         if (red != 0.0) fracStageRed = totr / red;
1867:         else fracStageRed = 0.0;
1868:         if (totm != 0.0) totml /= totm;
1869:         else totml = 0.0;
1870:         if (maxt != 0.0) flopr = totf / maxt;
1871:         else flopr = 0.0;
1872:         if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0)
1873:           PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6);
1874:         else
1875:           PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6);
1876:         if (PetscLogMemory) PetscFPrintf(comm, fd, " %5.0f   %5.0f   %5.0f   %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6);
1877:   #if defined(PETSC_HAVE_DEVICE)
1878:         if (totf != 0.0) fracgflops = gflops / totf;
1879:         else fracgflops = 0.0;
1880:         if (gmaxt != 0.0) gflopr = gflops / gmaxt;
1881:         else gflopr = 0.0;
1882:         PetscFPrintf(comm, fd, "   %5.0f   %4.0f %3.2e %4.0f %3.2e % 2.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops);
1883:   #endif
1884:         PetscFPrintf(comm, fd, "\n");
1885:       }
1886:     }
1887:   }

1889:   /* Memory usage and object creation */
1890:   PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------");
1891:   if (PetscLogMemory) PetscFPrintf(comm, fd, "-----------------------------");
1892:   #if defined(PETSC_HAVE_DEVICE)
1893:   PetscFPrintf(comm, fd, "---------------------------------------");
1894:   #endif
1895:   PetscFPrintf(comm, fd, "\n");
1896:   PetscFPrintf(comm, fd, "\n");

1898:   /* Right now, only stages on the first processor are reported here, meaning only objects associated with
1899:      the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then
1900:      stats for stages local to processor sets.
1901:   */
1902:   /* We should figure out the longest object name here (now 20 characters) */
1903:   PetscFPrintf(comm, fd, "Object Type          Creations   Destructions. Reports information only for process 0.\n");
1904:   for (stage = 0; stage < numStages; stage++) {
1905:     if (localStageUsed[stage]) {
1906:       classInfo = stageLog->stageInfo[stage].classLog->classInfo;
1907:       PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name);
1908:       for (oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) {
1909:         if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) {
1910:           PetscFPrintf(comm, fd, "%20s %5d          %5d\n", stageLog->classLog->classInfo[oclass].name, classInfo[oclass].creations, classInfo[oclass].destructions);
1911:         }
1912:       }
1913:     } else {
1914:       if (!localStageVisible[stage]) continue;
1915:       PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);
1916:     }
1917:   }

1919:   PetscFree(localStageUsed);
1920:   PetscFree(stageUsed);
1921:   PetscFree(localStageVisible);
1922:   PetscFree(stageVisible);

1924:   /* Information unrelated to this particular run */
1925:   PetscFPrintf(comm, fd, "========================================================================================================================\n");
1926:   PetscTime(&y);
1927:   PetscTime(&x);
1928:   PetscTime(&y);
1929:   PetscTime(&y);
1930:   PetscTime(&y);
1931:   PetscTime(&y);
1932:   PetscTime(&y);
1933:   PetscTime(&y);
1934:   PetscTime(&y);
1935:   PetscTime(&y);
1936:   PetscTime(&y);
1937:   PetscTime(&y);
1938:   PetscFPrintf(comm, fd, "Average time to get PetscTime(): %g\n", (y - x) / 10.0);
1939:   /* MPI information */
1940:   if (size > 1) {
1941:     MPI_Status  status;
1942:     PetscMPIInt tag;
1943:     MPI_Comm    newcomm;

1945:     MPI_Barrier(comm);
1946:     PetscTime(&x);
1947:     MPI_Barrier(comm);
1948:     MPI_Barrier(comm);
1949:     MPI_Barrier(comm);
1950:     MPI_Barrier(comm);
1951:     MPI_Barrier(comm);
1952:     PetscTime(&y);
1953:     PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0);
1954:     PetscCommDuplicate(comm, &newcomm, &tag);
1955:     MPI_Barrier(comm);
1956:     if (rank) {
1957:       MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status);
1958:       MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm);
1959:     } else {
1960:       PetscTime(&x);
1961:       MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm);
1962:       MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status);
1963:       PetscTime(&y);
1964:       PetscFPrintf(comm, fd, "Average time for zero size MPI_Send(): %g\n", (y - x) / size);
1965:     }
1966:     PetscCommDestroy(&newcomm);
1967:   }
1968:   PetscOptionsView(NULL, viewer);

1970:   /* Machine and compile information */
1971:   #if defined(PETSC_USE_FORTRAN_KERNELS)
1972:   PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n");
1973:   #else
1974:   PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n");
1975:   #endif
1976:   #if defined(PETSC_USE_64BIT_INDICES)
1977:   PetscFPrintf(comm, fd, "Compiled with 64 bit PetscInt\n");
1978:   #elif defined(PETSC_USE___FLOAT128)
1979:   PetscFPrintf(comm, fd, "Compiled with 32 bit PetscInt\n");
1980:   #endif
1981:   #if defined(PETSC_USE_REAL_SINGLE)
1982:   PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n");
1983:   #elif defined(PETSC_USE___FLOAT128)
1984:   PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n");
1985:   #endif
1986:   #if defined(PETSC_USE_REAL_MAT_SINGLE)
1987:   PetscFPrintf(comm, fd, "Compiled with single precision matrices\n");
1988:   #else
1989:   PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n");
1990:   #endif
1991:   PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt));

1993:   PetscFPrintf(comm, fd, "Configure options: %s", petscconfigureoptions);
1994:   PetscFPrintf(comm, fd, "%s", petscmachineinfo);
1995:   PetscFPrintf(comm, fd, "%s", petsccompilerinfo);
1996:   PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo);
1997:   PetscFPrintf(comm, fd, "%s", petsclinkerinfo);

1999:   /* Cleanup */
2000:   PetscFPrintf(comm, fd, "\n");
2001:   PetscLogViewWarnNoGpuAwareMpi(comm, fd);
2002:   PetscLogViewWarnDebugging(comm, fd);
2003:   PetscFPTrapPop();
2004:   return 0;
2005: }

2007: /*@C
2008:   PetscLogView - Prints a summary of the logging.

2010:   Collective over MPI_Comm

2012:   Input Parameter:
2013: .  viewer - an ASCII viewer

2015:   Options Database Keys:
2016: +  -log_view [:filename] - Prints summary of log information
2017: .  -log_view :filename.py:ascii_info_detail - Saves logging information from each process as a Python file
2018: .  -log_view :filename.xml:ascii_xml - Saves a summary of the logging information in a nested format (see below for how to view it)
2019: .  -log_view :filename.txt:ascii_flamegraph - Saves logging information in a format suitable for visualising as a Flame Graph (see below for how to view it)
2020: .  -log_view_memory - Also display memory usage in each event
2021: .  -log_view_gpu_time - Also display time in each event for GPU kernels (Note this may slow the computation)
2022: .  -log_all - Saves a file Log.rank for each MPI rank with details of each step of the computation
2023: -  -log_trace [filename] - Displays a trace of what each process is doing

2025:   Notes:
2026:   It is possible to control the logging programatically but we recommend using the options database approach whenever possible
2027:   By default the summary is printed to stdout.

2029:   Before calling this routine you must have called either PetscLogDefaultBegin() or PetscLogNestedBegin()

2031:   If PETSc is configured with --with-logging=0 then this functionality is not available

2033:   To view the nested XML format filename.xml first copy  ${PETSC_DIR}/share/petsc/xml/performance_xml2html.xsl to the current
2034:   directory then open filename.xml with your browser. Specific notes for certain browsers
2035: $    Firefox and Internet explorer - simply open the file
2036: $    Google Chrome - you must start up Chrome with the option --allow-file-access-from-files
2037: $    Safari - see https://ccm.net/faq/36342-safari-how-to-enable-local-file-access
2038:   or one can use the package http://xmlsoft.org/XSLT/xsltproc2.html to translate the xml file to html and then open it with
2039:   your browser.
2040:   Alternatively, use the script ${PETSC_DIR}/lib/petsc/bin/petsc-performance-view to automatically open a new browser
2041:   window and render the XML log file contents.

2043:   The nested XML format was kindly donated by Koos Huijssen and Christiaan M. Klaij  MARITIME  RESEARCH  INSTITUTE  NETHERLANDS

2045:   The Flame Graph output can be visualised using either the original Flame Graph script (https://github.com/brendangregg/FlameGraph)
2046:   or using speedscope (https://www.speedscope.app).
2047:   Old XML profiles may be converted into this format using the script ${PETSC_DIR}/lib/petsc/bin/xml2flamegraph.py.

2049:   Level: beginner

2051: .seealso: `PetscLogDefaultBegin()`, `PetscLogDump()`
2052: @*/
2053: PetscErrorCode PetscLogView(PetscViewer viewer)
2054: {
2055:   PetscBool         isascii;
2056:   PetscViewerFormat format;
2057:   int               stage, lastStage;
2058:   PetscStageLog     stageLog;

2061:   /* Pop off any stages the user forgot to remove */
2062:   lastStage = 0;
2063:   PetscLogGetStageLog(&stageLog);
2064:   PetscStageLogGetCurrent(stageLog, &stage);
2065:   while (stage >= 0) {
2066:     lastStage = stage;
2067:     PetscStageLogPop(stageLog);
2068:     PetscStageLogGetCurrent(stageLog, &stage);
2069:   }
2070:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);
2072:   PetscViewerGetFormat(viewer, &format);
2073:   if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) {
2074:     PetscLogView_Default(viewer);
2075:   } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2076:     PetscLogView_Detailed(viewer);
2077:   } else if (format == PETSC_VIEWER_ASCII_CSV) {
2078:     PetscLogView_CSV(viewer);
2079:   } else if (format == PETSC_VIEWER_ASCII_XML) {
2080:     PetscLogView_Nested(viewer);
2081:   } else if (format == PETSC_VIEWER_ASCII_FLAMEGRAPH) {
2082:     PetscLogView_Flamegraph(viewer);
2083:   }
2084:   PetscStageLogPush(stageLog, lastStage);
2085:   return 0;
2086: }

2088: /*@C
2089:   PetscLogViewFromOptions - Processes command line options to determine if/how a `PetscLog` is to be viewed.

2091:   Collective on `PETSC_COMM_WORLD`

2093:   Level: developer

2095: .seealso: `PetscLogView()`
2096: @*/
2097: PetscErrorCode PetscLogViewFromOptions(void)
2098: {
2099:   PetscViewer       viewer;
2100:   PetscBool         flg;
2101:   PetscViewerFormat format;

2103:   PetscOptionsGetViewer(PETSC_COMM_WORLD, NULL, NULL, "-log_view", &viewer, &format, &flg);
2104:   if (flg) {
2105:     PetscViewerPushFormat(viewer, format);
2106:     PetscLogView(viewer);
2107:     PetscViewerPopFormat(viewer);
2108:     PetscViewerDestroy(&viewer);
2109:   }
2110:   return 0;
2111: }

2113: /*----------------------------------------------- Counter Functions -------------------------------------------------*/
2114: /*@C
2115:    PetscGetFlops - Returns the number of flops used on this processor
2116:    since the program began.

2118:    Not Collective

2120:    Output Parameter:
2121:    flops - number of floating point operations

2123:    Notes:
2124:    A global counter logs all PETSc flop counts.  The user can use
2125:    `PetscLogFlops()` to increment this counter to include flops for the
2126:    application code.

2128:    A separate counter `PetscLogGPUFlops()` logs the flops that occur on any GPU associated with this MPI rank

2130:    Level: intermediate

2132: .seealso: `PetscLogGPUFlops()`, `PetscTime()`, `PetscLogFlops()`
2133: @*/
2134: PetscErrorCode PetscGetFlops(PetscLogDouble *flops)
2135: {
2136:   *flops = petsc_TotalFlops;
2137:   return 0;
2138: }

2140: PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...)
2141: {
2142:   size_t  fullLength;
2143:   va_list Argp;

2145:   if (!petsc_logObjects) return 0;
2146:   va_start(Argp, format);
2147:   PetscVSNPrintf(petsc_objects[obj->id].info, 64, format, &fullLength, Argp);
2148:   va_end(Argp);
2149:   return 0;
2150: }

2152: /*MC
2153:    PetscLogFlops - Adds floating point operations to the global counter.

2155:    Synopsis:
2156: #include <petsclog.h>
2157:    PetscErrorCode PetscLogFlops(PetscLogDouble f)

2159:    Not Collective

2161:    Input Parameter:
2162: .  f - flop counter

2164:    Usage:
2165: .vb
2166:      PetscLogEvent USER_EVENT;
2167:      PetscLogEventRegister("User event",0,&USER_EVENT);
2168:      PetscLogEventBegin(USER_EVENT,0,0,0,0);
2169:         [code segment to monitor]
2170:         PetscLogFlops(user_flops)
2171:      PetscLogEventEnd(USER_EVENT,0,0,0,0);
2172: .ve

2174:    Note:
2175:    A global counter logs all PETSc flop counts.  The user can use
2176:    PetscLogFlops() to increment this counter to include flops for the
2177:    application code.

2179:    Level: intermediate

2181: .seealso: `PetscLogGPUFlops()`, `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscGetFlops()`
2182: M*/

2184: /*MC
2185:    PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice)
2186:     to get accurate timings

2188:    Synopsis:
2189: #include <petsclog.h>
2190:    void PetscPreLoadBegin(PetscBool  flag,char *name);

2192:    Not Collective

2194:    Input Parameters:
2195: +   flag - PETSC_TRUE to run twice, `PETSC_FALSE` to run once, may be overridden
2196:            with command line option -preload true or -preload false
2197: -   name - name of first stage (lines of code timed separately with -log_view) to
2198:            be preloaded

2200:    Usage:
2201: .vb
2202:      PetscPreLoadBegin(PETSC_TRUE,"first stage);
2203:        lines of code
2204:        PetscPreLoadStage("second stage");
2205:        lines of code
2206:      PetscPreLoadEnd();
2207: .ve

2209:    Note:
2210:     Only works in C/C++, not Fortran

2212:      Flags available within the macro.
2213: +    PetscPreLoadingUsed - true if we are or have done preloading
2214: .    PetscPreLoadingOn - true if it is CURRENTLY doing preload
2215: .    PetscPreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second
2216: -    PetscPreLoadMax - number of times it will do the computation, only one when preloading is turned on
2217:      The first two variables are available throughout the program, the second two only between the PetscPreLoadBegin()
2218:      and PetscPreLoadEnd()

2220:    Level: intermediate

2222: .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()`
2223: M*/

2225: /*MC
2226:    PetscPreLoadEnd - End a segment of code that may be preloaded (run twice)
2227:     to get accurate timings

2229:    Synopsis:
2230: #include <petsclog.h>
2231:    void PetscPreLoadEnd(void);

2233:    Not Collective

2235:    Usage:
2236: .vb
2237:      PetscPreLoadBegin(PETSC_TRUE,"first stage);
2238:        lines of code
2239:        PetscPreLoadStage("second stage");
2240:        lines of code
2241:      PetscPreLoadEnd();
2242: .ve

2244:    Note:
2245:     Only works in C/C++ not fortran

2247:    Level: intermediate

2249: .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadStage()`
2250: M*/

2252: /*MC
2253:    PetscPreLoadStage - Start a new segment of code to be timed separately.
2254:     to get accurate timings

2256:    Synopsis:
2257: #include <petsclog.h>
2258:    void PetscPreLoadStage(char *name);

2260:    Not Collective

2262:    Usage:
2263: .vb
2264:      PetscPreLoadBegin(PETSC_TRUE,"first stage);
2265:        lines of code
2266:        PetscPreLoadStage("second stage");
2267:        lines of code
2268:      PetscPreLoadEnd();
2269: .ve

2271:    Note:
2272:     Only works in C/C++ not fortran

2274:    Level: intermediate

2276: .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`
2277: M*/

2279:   #if PetscDefined(HAVE_DEVICE)
2280: #include <petsc/private/deviceimpl.h>

2282: PetscBool PetscLogGpuTimeFlag = PETSC_FALSE;

2284: /*
2285:    This cannot be called by users between PetscInitialize() and PetscFinalize() at any random location in the code
2286:    because it will result in timing results that cannot be interpreted.
2287: */
2288: static PetscErrorCode PetscLogGpuTime_Off(void)
2289: {
2290:   PetscLogGpuTimeFlag = PETSC_FALSE;
2291:   return 0;
2292: }

2294: /*@C
2295:      PetscLogGpuTime - turn on the logging of GPU time for GPU kernels

2297:   Options Database Key:
2298: .   -log_view_gpu_time - provide the GPU times in the -log_view output

2300:   Notes:
2301:     Turning on the timing of the
2302:     GPU kernels can slow down the entire computation and should only be used when studying the performance
2303:     of operations on GPU such as vector operations and matrix-vector operations.

2305:     This routine should only be called once near the beginning of the program. Once it is started it cannot be turned off.

2307:    Level: advanced

2309: .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTimeBegin()`
2310: @*/
2311: PetscErrorCode PetscLogGpuTime(void)
2312: {
2313:   if (!PetscLogGpuTimeFlag) PetscRegisterFinalize(PetscLogGpuTime_Off);
2314:   PetscLogGpuTimeFlag = PETSC_TRUE;
2315:   return 0;
2316: }

2318: /*@C
2319:   PetscLogGpuTimeBegin - Start timer for device

2321:   Notes:
2322:     When CUDA or HIP is enabled, the timer is run on the GPU, it is a separate logging of time devoted to GPU computations (excluding kernel launch times).

2324:     When CUDA or HIP is not available, the timer is run on the CPU, it is a separate logging of time devoted to GPU computations (including kernel launch times).

2326:     There is no need to call WaitForCUDA() or WaitForHIP() between `PetscLogGpuTimeBegin()` and `PetscLogGpuTimeEnd()`

2328:     This timer should NOT include times for data transfers between the GPU and CPU, nor setup actions such as allocating space.

2330:     The regular logging captures the time for data transfers and any CPU activites during the event

2332:     It is used to compute the flop rate on the GPU as it is actively engaged in running a kernel.

2334:   Developer Notes:
2335:     The GPU event timer captures the execution time of all the kernels launched in the default stream by the CPU between `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()`.

2337:     `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()` insert the begin and end events into the default stream (stream 0). The device will record a time stamp for the
2338:     event when it reaches that event in the stream. The function xxxEventSynchronize() is called in `PetsLogGpuTimeEnd()` to block CPU execution,
2339:     but not continued GPU excution, until the timer event is recorded.

2341:   Level: intermediate

2343: .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTime()`
2344: @*/
2345: PetscErrorCode PetscLogGpuTimeBegin(void)
2346: {
2347:   if (!PetscLogPLB || !PetscLogGpuTimeFlag) return 0;
2348:   if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) {
2349:     PetscDeviceContext dctx;

2351:     PetscDeviceContextGetCurrentContext(&dctx);
2352:     PetscDeviceContextBeginTimer_Internal(dctx);
2353:   } else {
2354:     PetscTimeSubtract(&petsc_gtime);
2355:   }
2356:   return 0;
2357: }

2359: /*@C
2360:   PetscLogGpuTimeEnd - Stop timer for device

2362:   Level: intermediate

2364: .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeBegin()`
2365: @*/
2366: PetscErrorCode PetscLogGpuTimeEnd(void)
2367: {
2368:   if (!PetscLogPLE || !PetscLogGpuTimeFlag) return 0;
2369:   if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) {
2370:     PetscDeviceContext dctx;
2371:     PetscLogDouble     elapsed;

2373:     PetscDeviceContextGetCurrentContext(&dctx);
2374:     PetscDeviceContextEndTimer_Internal(dctx, &elapsed);
2375:     petsc_gtime += (elapsed / 1000.0);
2376:   } else {
2377:     PetscTimeAdd(&petsc_gtime);
2378:   }
2379:   return 0;
2380: }
2381:   #endif /* end of PETSC_HAVE_DEVICE */

2383: #else /* end of -DPETSC_USE_LOG section */

2385: PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...)
2386: {
2387:   return 0;
2388: }

2390: #endif /* PETSC_USE_LOG*/

2392: PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID;
2393: PetscClassId PETSC_OBJECT_CLASSID  = 0;

2395: /*@C
2396:   PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code.

2398:   Not Collective

2400:   Input Parameter:
2401: . name   - The class name

2403:   Output Parameter:
2404: . oclass - The class id or classid

2406:   Level: developer

2408: .seealso: `PetscLogEventRegister()`
2409: @*/
2410: PetscErrorCode PetscClassIdRegister(const char name[], PetscClassId *oclass)
2411: {
2412: #if defined(PETSC_USE_LOG)
2413:   PetscStageLog stageLog;
2414:   PetscInt      stage;
2415: #endif

2417:   *oclass = ++PETSC_LARGEST_CLASSID;
2418: #if defined(PETSC_USE_LOG)
2419:   PetscLogGetStageLog(&stageLog);
2420:   PetscClassRegLogRegister(stageLog->classLog, name, *oclass);
2421:   for (stage = 0; stage < stageLog->numStages; stage++) PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses);
2422: #endif
2423:   return 0;
2424: }

2426: #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_MPE)
2427:   #include <mpe.h>

2429: PetscBool PetscBeganMPE = PETSC_FALSE;

2431: PETSC_INTERN PetscErrorCode PetscLogEventBeginMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject);
2432: PETSC_INTERN PetscErrorCode PetscLogEventEndMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject);

2434: /*@C
2435:    PetscLogMPEBegin - Turns on MPE logging of events. This creates large log files
2436:    and slows the program down.

2438:    Collective over `PETSC_COMM_WORLD`

2440:    Options Database Key:
2441: . -log_mpe - Prints extensive log information

2443:    Note:
2444:    A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is
2445:    intended for production runs since it logs only flop rates and object
2446:    creation (and should not significantly slow the programs).

2448:    Level: advanced

2450: .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogEventActivate()`,
2451:           `PetscLogEventDeactivate()`
2452: @*/
2453: PetscErrorCode PetscLogMPEBegin(void)
2454: {
2455:   /* Do MPE initialization */
2456:   if (!MPE_Initialized_logging()) { /* This function exists in mpich 1.1.2 and higher */
2457:     PetscInfo(0, "Initializing MPE.\n");
2458:     MPE_Init_log();

2460:     PetscBeganMPE = PETSC_TRUE;
2461:   } else {
2462:     PetscInfo(0, "MPE already initialized. Not attempting to reinitialize.\n");
2463:   }
2464:   PetscLogSet(PetscLogEventBeginMPE, PetscLogEventEndMPE);
2465:   return 0;
2466: }

2468: /*@C
2469:    PetscLogMPEDump - Dumps the MPE logging info to file for later use with Jumpshot.

2471:    Collective over `PETSC_COMM_WORLD`

2473:    Level: advanced

2475: .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogMPEBegin()`
2476: @*/
2477: PetscErrorCode PetscLogMPEDump(const char sname[])
2478: {
2479:   char name[PETSC_MAX_PATH_LEN];

2481:   if (PetscBeganMPE) {
2482:     PetscInfo(0, "Finalizing MPE.\n");
2483:     if (sname) {
2484:       PetscStrcpy(name, sname);
2485:     } else {
2486:       PetscGetProgramName(name, sizeof(name));
2487:     }
2488:     MPE_Finish_log(name);
2489:   } else {
2490:     PetscInfo(0, "Not finalizing MPE (not started by PETSc).\n");
2491:   }
2492:   return 0;
2493: }

2495:   #define PETSC_RGB_COLORS_MAX 39
2496: static const char *PetscLogMPERGBColors[PETSC_RGB_COLORS_MAX] = {"OliveDrab:      ", "BlueViolet:     ", "CadetBlue:      ", "CornflowerBlue: ", "DarkGoldenrod:  ", "DarkGreen:      ", "DarkKhaki:      ", "DarkOliveGreen: ",
2497:                                                                  "DarkOrange:     ", "DarkOrchid:     ", "DarkSeaGreen:   ", "DarkSlateGray:  ", "DarkTurquoise:  ", "DeepPink:       ", "DarkKhaki:      ", "DimGray:        ",
2498:                                                                  "DodgerBlue:     ", "GreenYellow:    ", "HotPink:        ", "IndianRed:      ", "LavenderBlush:  ", "LawnGreen:      ", "LemonChiffon:   ", "LightCoral:     ",
2499:                                                                  "LightCyan:      ", "LightPink:      ", "LightSalmon:    ", "LightSlateGray: ", "LightYellow:    ", "LimeGreen:      ", "MediumPurple:   ", "MediumSeaGreen: ",
2500:                                                                  "MediumSlateBlue:", "MidnightBlue:   ", "MintCream:      ", "MistyRose:      ", "NavajoWhite:    ", "NavyBlue:       ", "OliveDrab:      "};

2502: /*@C
2503:   PetscLogMPEGetRGBColor - This routine returns a rgb color useable with `PetscLogEventRegister()`

2505:   Not collective. Maybe it should be?

2507:   Output Parameter:
2508: . str - character string representing the color

2510:   Level: developer

2512: .seealso: `PetscLogEventRegister()`
2513: @*/
2514: PetscErrorCode PetscLogMPEGetRGBColor(const char *str[])
2515: {
2516:   static int idx = 0;

2518:   *str = PetscLogMPERGBColors[idx];
2519:   idx  = (idx + 1) % PETSC_RGB_COLORS_MAX;
2520:   return 0;
2521: }

2523: #endif /* PETSC_USE_LOG && PETSC_HAVE_MPE */