Actual source code: ex61f.F90

  1: !
  2: !        Demonstrates having each OpenMP thread manage its own PETSc objects and solves
  3: !           - each thread is ONLY allowed to access objects that IT created
  4: !                  that is, threads cannot shared objects
  5: !
  6: !        Run with "export OMP_NUM_THREADS=16 ./ex61f"
  7: !           to use 16 independent threads
  8: !
  9: !        ./configure --with-threadsafety --with-log=0 --with-openmp
 10: !
 11:          module ex61fmodule
 12:          implicit none
 13:          contains
 14:          subroutine split_indices(total,num_pieces,ibeg,iend)
 15:            implicit none

 17:            integer :: total
 18:            integer :: num_pieces
 19:            integer :: ibeg(num_pieces), iend(num_pieces)
 20:            integer :: itmp1, itmp2, ioffset, i

 22:            itmp1 = total/num_pieces
 23:            itmp2 = mod(total,num_pieces)
 24:            ioffset = 0
 25:            do i=1,itmp2
 26:               ibeg(i) = ioffset + 1
 27:               iend(i) = ioffset + (itmp1+1)
 28:               ioffset = iend(i)
 29:            enddo
 30:            do i=itmp2+1,num_pieces
 31:               ibeg(i) = ioffset + 1
 32:               if (ibeg(i) > total) then
 33:                  iend(i) = ibeg(i) - 1
 34:               else
 35:                  iend(i) = ioffset + itmp1
 36:                  ioffset = iend(i)
 37:               endif
 38:            enddo

 40:          end subroutine split_indices
 41:        end module ex61fmodule

 43:       program tpetsc

 45: #include <petsc/finclude/petsc.h>
 46:       use ex61fmodule
 47:       use petsc
 48:       implicit none
 49: !     ----------------------------
 50: !     test concurrent petsc solver
 51: !     ----------------------------

 53:       integer, parameter :: NCASES=100
 54:       integer, parameter :: MAXTHREADS=64
 55:       real(8), parameter :: tol = 1.0d-6

 57:       integer, dimension(MAXTHREADS) :: ibeg,iend

 59: !$   integer, external :: omp_get_num_threads

 61:       Mat, target :: Mcol_f_mat(MAXTHREADS)
 62:       Vec, target :: Mcol_f_vecb(MAXTHREADS)
 63:       Vec, target :: Mcol_f_vecx(MAXTHREADS)
 64:       KSP, target :: Mcol_f_ksp(MAXTHREADS)
 65:       PC,  target :: MPC(MAXTHREADS)

 67:       Mat, pointer :: col_f_mat
 68:       Vec, pointer :: col_f_vecb
 69:       Vec, pointer :: col_f_vecx
 70:       KSP, pointer :: col_f_ksp
 71:       PC, pointer :: pc

 73:       PetscInt :: ith, nthreads
 74:       PetscErrorCode ierr

 76:       integer, parameter :: nz_per_row = 9
 77:       integer, parameter :: n =100
 78:       integer :: i,j,ij,ij2,ii,jj,nz,ip, dx,dy,icase
 79:       integer, allocatable :: ilist(:),jlist(:)
 80:       PetscScalar :: aij
 81:       PetscScalar, allocatable :: alist(:)
 82:       logical :: isvalid_ii, isvalid_jj, is_diag

 84:       PetscInt nrow
 85:       PetscInt ncol
 86:       PetscScalar, allocatable :: x(:), b(:)
 87:       real(8) :: err(NCASES)

 89:       integer :: t1,t2,count_rate
 90:       real :: ttime

 92:       PetscCallA(PetscInitialize(ierr))

 94:       allocate(ilist(n*n*nz_per_row),jlist(n*n*nz_per_row),alist(n*n*nz_per_row))
 95:       allocate(x(0:(n*n-1)),b(0:(n*n-1)))
 96:       nrow = n*n
 97:       ncol = nrow

 99:       nthreads = 1
100: !$omp parallel
101: !$omp master
102: !$      nthreads = omp_get_num_threads()
103: !$omp end master
104: !$omp end parallel
105:       print*,'nthreads = ', nthreads,' NCASES = ', NCASES

107:       call split_indices(NCASES,nthreads,ibeg,iend)

109: !$omp parallel do                                                        &
110: !$omp private(ith,ierr)                                                  &
111: !$omp private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp)
112:       do ith=1,nthreads
113:          col_f_mat => Mcol_f_mat(ith)
114:          col_f_vecb => Mcol_f_vecb(ith)
115:          col_f_vecx => Mcol_f_vecx(ith)
116:          col_f_ksp => Mcol_f_ksp(ith)

118:          PetscCallA(MatCreateSeqAIJ( PETSC_COMM_SELF, nrow,ncol, nz_per_row,PETSC_NULL_INTEGER, col_f_mat, ierr))
119:          PetscCallA(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nrow,PETSC_NULL_SCALAR, col_f_vecb, ierr))
120:          PetscCallA(VecDuplicate(col_f_vecb, col_f_vecx,ierr))
121:          PetscCallA(KSPCreate(PETSC_COMM_SELF, col_f_ksp,ierr))
122:        enddo

124: !      -----------------------
125: !      setup sparsity pattern
126: !      -----------------------
127:        nz = 0
128:        do j=1,n
129:        do i=1,n
130:         ij = i + (j-1)*n
131:         do dx=-1,1
132:         do dy=-1,1
133:           ii = i + dx
134:           jj = j + dy

136:           ij2 = ii + (jj-1)*n
137:           isvalid_ii = (1 <= ii).and.(ii <= n)
138:           isvalid_jj = (1 <= jj).and.(jj <= n)
139:           if (isvalid_ii.and.isvalid_jj) then
140:              is_diag = (ij .eq. ij2)
141:              nz = nz + 1
142:              ilist(nz) = ij
143:              jlist(nz) = ij2
144:              if (is_diag) then
145:                aij = 4.0
146:              else
147:                aij = -1.0
148:              endif
149:              alist(nz) = aij
150:            endif
151:           enddo
152:           enddo
153:          enddo
154:          enddo

156:        print*,'nz = ', nz

158: !      ---------------------------------
159: !      convert from fortan to c indexing
160: !      ---------------------------------
161:        ilist(1:nz) = ilist(1:nz) - 1
162:        jlist(1:nz) = jlist(1:nz) - 1

164: !      --------------
165: !      setup matrices
166: !      --------------
167:        call system_clock(t1,count_rate)

169: !$omp  parallel do                                                      &
170: !$omp& private(ith,icase,ip,i,j,ii,jj,aij,ierr,x,b)                      &
171: !$omp& private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp,pc)
172:        do ith=1,nthreads
173:          col_f_mat => Mcol_f_mat(ith)
174:          col_f_vecb => Mcol_f_vecb(ith)
175:          col_f_vecx => Mcol_f_vecx(ith)
176:          col_f_ksp => Mcol_f_ksp(ith)
177:          pc => MPC(ith)

179:         do icase=ibeg(ith),iend(ith)

181:          do ip=1,nz
182:            ii = ilist(ip)
183:            jj = jlist(ip)
184:            aij = alist(ip)
185:            PetscCallA(MatSetValue(col_f_mat,ii,jj,aij,INSERT_VALUES,ierr))
186:          enddo
187:          PetscCallA(MatAssemblyBegin(col_f_mat,MAT_FINAL_ASSEMBLY,ierr))
188:          PetscCallA(MatAssemblyEnd(col_f_mat,MAT_FINAL_ASSEMBLY,ierr))
189:          PetscCallA(KSPSetOperators(col_f_ksp,col_f_mat,col_f_mat,ierr))

191:          ! set linear solver
192:          PetscCallA(KSPGetPC(col_f_ksp,pc,ierr))
193:          PetscCallA(PCSetType(pc,PCLU,ierr))

195:          ! associate petsc vector with allocated array
196:          x(:) = 1
197: !$omp    critical
198:          PetscCallA(VecPlaceArray(col_f_vecx,x,ierr))
199: !$omp    end critical

201:          b(:) = 0
202:          do ip=1,nz
203:            i = ilist(ip)
204:            j = jlist(ip)
205:            aij = alist(ip)
206:            b(i) = b(i) + aij*x(j)
207:          enddo
208:          x = 0
209: !$omp    critical
210:          PetscCallA(VecPlaceArray(col_f_vecb,b,ierr))
211: !$omp    end critical

213: !  -----------------------------------------------------------
214: !  key test, need to solve multiple linear systems in parallel
215: !  -----------------------------------------------------------
216:          PetscCallA(KSPSetFromOptions(col_f_ksp,ierr))

218:          PetscCallA(KSPSetUp(col_f_ksp,ierr))
219:          PetscCallA(KSPSolve(col_f_ksp,col_f_vecb,col_f_vecx,ierr))

221: !        ------------
222: !        check answer
223: !        ------------
224:          err(icase) = maxval(abs(x(:)-1))

226: !$omp    critical
227:          PetscCallA(VecResetArray(col_f_vecx,ierr))
228: !$omp    end critical

230: !$omp    critical
231:          PetscCallA(VecResetArray(col_f_vecb,ierr))
232: !$omp    end critical

234:        enddo
235:        enddo

237: !$omp parallel do                                                        &
238: !$omp private(ith,ierr)                                                  &
239: !$omp private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp)
240:       do ith=1,nthreads
241:          col_f_mat => Mcol_f_mat(ith)
242:          col_f_vecb => Mcol_f_vecb(ith)
243:          col_f_vecx => Mcol_f_vecx(ith)
244:          col_f_ksp => Mcol_f_ksp(ith)

246:          PetscCallA(MatDestroy(col_f_mat, ierr))
247:          PetscCallA(VecDestroy(col_f_vecb, ierr))
248:          PetscCallA(VecDestroy(col_f_vecx,ierr))

250:          PetscCallA(KSPDestroy(col_f_ksp,ierr))

252:        enddo

254:        call system_clock(t2,count_rate)
255:        ttime = real(t2-t1)/real(count_rate)
256:        write(*,*) 'total time is ',ttime

258:        write(*,*) 'maxval(err) ', maxval(err)
259:        do icase=1,NCASES
260:         if (err(icase) > tol) then
261:          write(*,*) 'icase,err(icase) ',icase,err(icase)
262:         endif
263:        enddo

265:        deallocate(ilist,jlist,alist)
266:        deallocate(x,b)
267:        PetscCallA(PetscFinalize(ierr))
268:        end program tpetsc

270: !/*TEST
271: !
272: !   build:
273: !      requires: double !complex !defined(PETSC_USE_64BIT_INDICES)
274: !
275: !   test:
276: !      output_file: output/ex61f_1.out
277: !      TODO: Need to determine how to test OpenMP code
278: !
279: !TEST*/