Actual source code: ex61f.F90

  1: !
  2: !        Demonstrates having each OpenMP thread manage its own PETSc objects and solves
  3: !           - each thread is ONLY allowed to access objects that IT created
  4: !                  that is, threads cannot shared objects
  5: !
  6: !        Run with "export OMP_NUM_THREADS=16 ./ex61f"
  7: !           to use 16 independent threads
  8: !
  9: !        ./configure --with-threadsafety --with-openmp
 10: !
 11: #include <petsc/finclude/petsc.h>
 12: module ex61fmodule
 13:   implicit none
 14: contains
 15:   subroutine split_indices(total, num_pieces, ibeg, iend)
 16:     implicit none

 18:     integer :: total
 19:     integer :: num_pieces
 20:     integer :: ibeg(num_pieces), iend(num_pieces)
 21:     integer :: itmp1, itmp2, ioffset, i

 23:     itmp1 = total/num_pieces
 24:     itmp2 = mod(total, num_pieces)
 25:     ioffset = 0
 26:     do i = 1, itmp2
 27:       ibeg(i) = ioffset + 1
 28:       iend(i) = ioffset + (itmp1 + 1)
 29:       ioffset = iend(i)
 30:     end do
 31:     do i = itmp2 + 1, num_pieces
 32:       ibeg(i) = ioffset + 1
 33:       if (ibeg(i) > total) then
 34:         iend(i) = ibeg(i) - 1
 35:       else
 36:         iend(i) = ioffset + itmp1
 37:         ioffset = iend(i)
 38:       end if
 39:     end do

 41:   end subroutine split_indices
 42: end module ex61fmodule

 44: program tpetsc

 46:   use ex61fmodule
 47:   use petsc
 48: !$ use OMP_LIB
 49:   implicit none
 50: !     ----------------------------
 51: !     test concurrent PETSc solver
 52: !     ----------------------------

 54:   integer, parameter :: NCASES = 100
 55:   integer, parameter :: MAXTHREADS = 64
 56:   real(8), parameter :: tol = 1.0d-6

 58:   integer, dimension(MAXTHREADS) :: ibeg, iend

 60: !$ integer :: omp_get_num_threads

 62:   Mat, target :: Mcol_f_mat(MAXTHREADS)
 63:   Vec, target :: Mcol_f_vecb(MAXTHREADS)
 64:   Vec, target :: Mcol_f_vecx(MAXTHREADS)
 65:   KSP, target :: Mcol_f_ksp(MAXTHREADS)
 66:   PC, target :: MPC(MAXTHREADS)

 68:   Mat, pointer :: col_f_mat
 69:   Vec, pointer :: col_f_vecb
 70:   Vec, pointer :: col_f_vecx
 71:   KSP, pointer :: col_f_ksp
 72:   PC, pointer :: pc

 74:   PetscInt :: ith, nthreads
 75:   PetscErrorCode ierr

 77:   integer, parameter :: nz_per_row = 9
 78:   integer, parameter :: n = 100
 79:   integer :: i, j, ij, ij2, ii, jj, nz, ip, dx, dy, icase
 80:   integer, allocatable :: ilist(:), jlist(:)
 81:   PetscScalar :: aij
 82:   PetscScalar, allocatable :: alist(:)
 83:   logical :: isvalid_ii, isvalid_jj, is_diag

 85:   PetscInt nrow
 86:   PetscInt ncol
 87:   PetscScalar, allocatable :: x(:), b(:)
 88:   real(8) :: err(NCASES)

 90:   integer :: t1, t2, count_rate
 91:   real :: ttime

 93:   PetscCallA(PetscInitialize(ierr))

 95:   allocate (ilist(n*n*nz_per_row), jlist(n*n*nz_per_row), alist(n*n*nz_per_row))
 96:   allocate (x(0:(n*n - 1)), b(0:(n*n - 1)))
 97:   nrow = n*n
 98:   ncol = nrow

100:   nthreads = 1
101: !$omp parallel
102: !$omp master
103: !$ nthreads = omp_get_num_threads()
104: !$omp end master
105: !$omp end parallel
106:   print *, 'nthreads = ', nthreads, ' NCASES = ', NCASES

108:   call split_indices(NCASES, nthreads, ibeg, iend)

110: !$omp parallel do                                                        &
111: !$omp private(ith,ierr)                                                  &
112: !$omp private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp)
113:   do ith = 1, nthreads
114:     col_f_mat => Mcol_f_mat(ith)
115:     col_f_vecb => Mcol_f_vecb(ith)
116:     col_f_vecx => Mcol_f_vecx(ith)
117:     col_f_ksp => Mcol_f_ksp(ith)

119:     PetscCallA(MatCreateSeqAIJ(PETSC_COMM_SELF, nrow, ncol, nz_per_row, PETSC_NULL_INTEGER_ARRAY, col_f_mat, ierr))
120:     PetscCallA(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nrow, PETSC_NULL_SCALAR_ARRAY, col_f_vecb, ierr))
121:     PetscCallA(VecDuplicate(col_f_vecb, col_f_vecx, ierr))
122:     PetscCallA(KSPCreate(PETSC_COMM_SELF, col_f_ksp, ierr))
123:   end do

125: !      -----------------------
126: !      setup sparsity pattern
127: !      -----------------------
128:   nz = 0
129:   do j = 1, n
130:   do i = 1, n
131:     ij = i + (j - 1)*n
132:     do dx = -1, 1
133:     do dy = -1, 1
134:       ii = i + dx
135:       jj = j + dy

137:       ij2 = ii + (jj - 1)*n
138:       isvalid_ii = (1 <= ii) .and. (ii <= n)
139:       isvalid_jj = (1 <= jj) .and. (jj <= n)
140:       if (isvalid_ii .and. isvalid_jj) then
141:         is_diag = (ij == ij2)
142:         nz = nz + 1
143:         ilist(nz) = ij
144:         jlist(nz) = ij2
145:         if (is_diag) then
146:           aij = 4.0
147:         else
148:           aij = -1.0
149:         end if
150:         alist(nz) = aij
151:       end if
152:     end do
153:     end do
154:   end do
155:   end do

157:   print *, 'nz = ', nz

159: !      ----------------------------------
160: !      convert from Fortran to C indexing
161: !      ----------------------------------
162:   ilist(1:nz) = ilist(1:nz) - 1
163:   jlist(1:nz) = jlist(1:nz) - 1

165: !      --------------
166: !      setup matrices
167: !      --------------
168:   call system_clock(t1, count_rate)

170: !$omp parallel do &
171: !$omp private(ith,icase,ip,i,j,ii,jj,aij,ierr,x,b)  &
172: !$omp private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp,pc)
173:   do ith = 1, nthreads
174:     col_f_mat => Mcol_f_mat(ith)
175:     col_f_vecb => Mcol_f_vecb(ith)
176:     col_f_vecx => Mcol_f_vecx(ith)
177:     col_f_ksp => Mcol_f_ksp(ith)
178:     pc => MPC(ith)

180:     do icase = ibeg(ith), iend(ith)

182:       do ip = 1, nz
183:         ii = ilist(ip)
184:         jj = jlist(ip)
185:         aij = alist(ip)
186:         PetscCallA(MatSetValue(col_f_mat, ii, jj, aij, INSERT_VALUES, ierr))
187:       end do
188:       PetscCallA(MatAssemblyBegin(col_f_mat, MAT_FINAL_ASSEMBLY, ierr))
189:       PetscCallA(MatAssemblyEnd(col_f_mat, MAT_FINAL_ASSEMBLY, ierr))
190:       PetscCallA(KSPSetOperators(col_f_ksp, col_f_mat, col_f_mat, ierr))

192:       ! set linear solver
193:       PetscCallA(KSPGetPC(col_f_ksp, pc, ierr))
194:       PetscCallA(PCSetType(pc, PCLU, ierr))

196:       ! associate PETSc vector with allocated array
197:       x(:) = 1
198: !$omp    critical
199:       PetscCallA(VecPlaceArray(col_f_vecx, x, ierr))
200: !$omp    end critical

202:       b(:) = 0
203:       do ip = 1, nz
204:         i = ilist(ip)
205:         j = jlist(ip)
206:         aij = alist(ip)
207:         b(i) = b(i) + aij*x(j)
208:       end do
209:       x = 0
210: !$omp    critical
211:       PetscCallA(VecPlaceArray(col_f_vecb, b, ierr))
212: !$omp    end critical

214: !  -----------------------------------------------------------
215: !  key test, need to solve multiple linear systems in parallel
216: !  -----------------------------------------------------------
217:       PetscCallA(KSPSetFromOptions(col_f_ksp, ierr))

219:       PetscCallA(KSPSetUp(col_f_ksp, ierr))
220:       PetscCallA(KSPSolve(col_f_ksp, col_f_vecb, col_f_vecx, ierr))

222: !        ------------
223: !        check answer
224: !        ------------
225:       err(icase) = maxval(abs(x(:) - 1))

227: !$omp    critical
228:       PetscCallA(VecResetArray(col_f_vecx, ierr))
229: !$omp    end critical

231: !$omp    critical
232:       PetscCallA(VecResetArray(col_f_vecb, ierr))
233: !$omp    end critical

235:     end do
236:   end do

238: !$omp parallel do &
239: !$omp private(ith,ierr) &
240: !$omp private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp)
241:   do ith = 1, nthreads
242:     col_f_mat => Mcol_f_mat(ith)
243:     col_f_vecb => Mcol_f_vecb(ith)
244:     col_f_vecx => Mcol_f_vecx(ith)
245:     col_f_ksp => Mcol_f_ksp(ith)

247:     PetscCallA(MatDestroy(col_f_mat, ierr))
248:     PetscCallA(VecDestroy(col_f_vecb, ierr))
249:     PetscCallA(VecDestroy(col_f_vecx, ierr))

251:     PetscCallA(KSPDestroy(col_f_ksp, ierr))

253:   end do

255:   call system_clock(t2, count_rate)
256:   ttime = real(t2 - t1)/real(count_rate)
257:   write (*, *) 'total time is ', ttime

259:   write (*, *) 'maxval(err) ', maxval(err)
260:   do icase = 1, NCASES
261:     if (err(icase) > tol) then
262:       write (*, *) 'icase,err(icase) ', icase, err(icase)
263:     end if
264:   end do

266:   deallocate (ilist, jlist, alist)
267:   deallocate (x, b)
268:   PetscCallA(PetscFinalize(ierr))
269: end program tpetsc

271: !/*TEST
272: !
273: !   build:
274: !      requires: double !complex !defined(PETSC_USE_64BIT_INDICES)
275: !
276: !   test:
277: !      output_file: output/ex61f_1.out
278: !      TODO: Need to determine how to test OpenMP code
279: !
280: !TEST*/