Actual source code: sseenabled.c

  1: #include <petscsys.h>

  3: #if defined(PETSC_HAVE_SSE)

  5:   #include PETSC_HAVE_SSE
  6:   #define SSE_FEATURE_FLAG 0x2000000 /* Mask for bit 25 (from bit 0) */

  8: PetscErrorCode PetscSSEHardwareTest(PetscBool *flag)
  9: {
 10:   char      vendor[13];
 11:   char      Intel[13] = "GenuineIntel";
 12:   char      AMD[13]   = "AuthenticAMD";
 13:   char      Hygon[13] = "HygonGenuine";
 14:   PetscBool flg;

 16:   PetscFunctionBegin;
 17:   PetscCall(PetscStrncpy(vendor, "************", sizeof(vendor)));
 18:   CPUID_GET_VENDOR(vendor);
 19:   PetscCall(PetscStrcmp(vendor, Intel, &flg));
 20:   if (!flg) PetscCall(PetscStrcmp(vendor, AMD, &flg));
 21:   if (!flg) {
 22:     PetscCall(PetscStrcmp(vendor, Hygon, &flg));
 23:     if (flg) {
 24:       /* Intel, AMD, and Hygon use bit 25 of CPUID_FEATURES */
 25:       /* to denote availability of SSE Support */
 26:       unsigned long myeax, myebx, myecx, myedx;
 27:       CPUID(CPUID_FEATURES, &myeax, &myebx, &myecx, &myedx);
 28:       if (myedx & SSE_FEATURE_FLAG) *flag = PETSC_TRUE;
 29:       else *flag = PETSC_FALSE;
 30:     }
 31:     PetscFunctionReturn(PETSC_SUCCESS);
 32:   }
 33: }

 35:   #if defined(PETSC_HAVE_FORK)
 36:     #include <signal.h>
 37:     /*
 38:    Early versions of the Linux kernel disables SSE hardware because
 39:    it does not know how to preserve the SSE state at a context switch.
 40:    To detect this feature, try an sse instruction in another process.
 41:    If it works, great!  If not, an illegal instruction signal will be thrown,
 42:    so catch it and return an error code.
 43: */
 44:     #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_Linux(arg)

 46: static void PetscSSEDisabledHandler(int sig)
 47: {
 48:   signal(SIGILL, SIG_IGN);
 49:   exit(-1);
 50: }

 52: PetscErrorCode PetscSSEOSEnabledTest_Linux(PetscBool *flag)
 53: {
 54:   int status, pid = 0;

 56:   PetscFunctionBegin;
 57:   signal(SIGILL, PetscSSEDisabledHandler);
 58:   pid = fork();
 59:   if (pid == 0) {
 60:     SSE_SCOPE_BEGIN;
 61:     XOR_PS(XMM0, XMM0);
 62:     SSE_SCOPE_END;
 63:     exit(0);
 64:   } else wait(&status);
 65:   if (!status) *flag = PETSC_TRUE;
 66:   else *flag = PETSC_FALSE;
 67:   PetscFunctionReturn(PETSC_SUCCESS);
 68: }

 70:   #else
 71:     /*
 72:    Windows 95/98/NT4 should have a Windows Update/Service Patch which enables this hardware.
 73:    Windows ME/2000 doesn't disable SSE Hardware
 74: */
 75:     #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_TRUE(arg)
 76:   #endif

 78: PetscErrorCode PetscSSEOSEnabledTest_TRUE(PetscBool *flag)
 79: {
 80:   PetscFunctionBegin;
 81:   if (flag) *flag = PETSC_TRUE;
 82:   PetscFunctionReturn(PETSC_SUCCESS);
 83: }

 85: #else /* Not defined PETSC_HAVE_SSE */

 87:   #define PetscSSEHardwareTest(arg)  PetscSSEEnabledTest_FALSE(arg)
 88:   #define PetscSSEOSEnabledTest(arg) PetscSSEEnabledTest_FALSE(arg)

 90: static PetscErrorCode PetscSSEEnabledTest_FALSE(PetscBool *flag)
 91: {
 92:   PetscFunctionBegin;
 93:   if (flag) *flag = PETSC_FALSE;
 94:   PetscFunctionReturn(PETSC_SUCCESS);
 95: }

 97: #endif /* defined PETSC_HAVE_SSE */

 99: static PetscBool petsc_sse_local_is_untested  = PETSC_TRUE;
100: static PetscBool petsc_sse_enabled_local      = PETSC_FALSE;
101: static PetscBool petsc_sse_global_is_untested = PETSC_TRUE;
102: static PetscBool petsc_sse_enabled_global     = PETSC_FALSE;

104: /*@
105:   PetscSSEIsEnabled - Determines if Intel Streaming SIMD Extensions (SSE) to the x86 instruction
106:   set can be used.  Some operating systems do not allow the use of these instructions despite
107:   hardware availability.

109:   Collective

111:   Input Parameter:
112: . comm - the MPI Communicator

114:   Output Parameters:
115: + lflag - Local Flag  `PETSC_TRUE` if enabled in this process
116: - gflag - Global Flag `PETSC_TRUE` if enabled for all processes in comm

118:   Options Database Key:
119: . -disable_sse - Disable use of hand tuned Intel SSE implementations

121:   Level: developer

123:   Note:
124:   `NULL` can be specified for `lflag` or `gflag` if either of these values are not desired.

126: .seealso: [](ch_profiling)
127: @*/
128: PetscErrorCode PetscSSEIsEnabled(MPI_Comm comm, PetscBool *lflag, PetscBool *gflag)
129: {
130:   PetscBool disabled_option;

132:   PetscFunctionBegin;
133:   if (petsc_sse_local_is_untested && petsc_sse_global_is_untested) {
134:     disabled_option = PETSC_FALSE;

136:     PetscCall(PetscOptionsGetBool(NULL, NULL, "-disable_sse", &disabled_option, NULL));
137:     if (disabled_option) {
138:       petsc_sse_local_is_untested  = PETSC_FALSE;
139:       petsc_sse_enabled_local      = PETSC_FALSE;
140:       petsc_sse_global_is_untested = PETSC_FALSE;
141:       petsc_sse_enabled_global     = PETSC_FALSE;
142:     }

144:     if (petsc_sse_local_is_untested) {
145:       PetscCall(PetscSSEHardwareTest(&petsc_sse_enabled_local));
146:       if (petsc_sse_enabled_local) { PetscCall(PetscSSEOSEnabledTest(&petsc_sse_enabled_local)); }
147:       petsc_sse_local_is_untested = PETSC_FALSE;
148:     }

150:     if (gflag && petsc_sse_global_is_untested) {
151:       PetscCallMPI(MPIU_Allreduce(&petsc_sse_enabled_local, &petsc_sse_enabled_global, 1, MPIU_BOOL, MPI_LAND, comm));

153:       petsc_sse_global_is_untested = PETSC_FALSE;
154:     }
155:   }

157:   if (lflag) *lflag = petsc_sse_enabled_local;
158:   if (gflag) *gflag = petsc_sse_enabled_global;
159:   PetscFunctionReturn(PETSC_SUCCESS);
160: }