Actual source code: ex61f.F90
1: !
2: ! Demonstrates having each OpenMP thread manage its own PETSc objects and solves
3: ! - each thread is ONLY allowed to access objects that IT created
4: ! that is, threads cannot shared objects
5: !
6: ! Run with "export OMP_NUM_THREADS=16 ./ex61f"
7: ! to use 16 independent threads
8: !
9: ! ./configure --with-threadsafety --with-openmp
10: !
11: module ex61fmodule
12: implicit none
13: contains
14: subroutine split_indices(total,num_pieces,ibeg,iend)
15: implicit none
17: integer :: total
18: integer :: num_pieces
19: integer :: ibeg(num_pieces), iend(num_pieces)
20: integer :: itmp1, itmp2, ioffset, i
22: itmp1 = total/num_pieces
23: itmp2 = mod(total,num_pieces)
24: ioffset = 0
25: do i=1,itmp2
26: ibeg(i) = ioffset + 1
27: iend(i) = ioffset + (itmp1+1)
28: ioffset = iend(i)
29: enddo
30: do i=itmp2+1,num_pieces
31: ibeg(i) = ioffset + 1
32: if (ibeg(i) > total) then
33: iend(i) = ibeg(i) - 1
34: else
35: iend(i) = ioffset + itmp1
36: ioffset = iend(i)
37: endif
38: enddo
40: end subroutine split_indices
41: end module ex61fmodule
43: program tpetsc
45: #include <petsc/finclude/petsc.h>
46: use ex61fmodule
47: use petsc
48: implicit none
49: ! ----------------------------
50: ! test concurrent petsc solver
51: ! ----------------------------
53: integer, parameter :: NCASES=100
54: integer, parameter :: MAXTHREADS=64
55: real(8), parameter :: tol = 1.0d-6
57: integer, dimension(MAXTHREADS) :: ibeg,iend
59: !$ integer, external :: omp_get_num_threads
61: Mat, target :: Mcol_f_mat(MAXTHREADS)
62: Vec, target :: Mcol_f_vecb(MAXTHREADS)
63: Vec, target :: Mcol_f_vecx(MAXTHREADS)
64: KSP, target :: Mcol_f_ksp(MAXTHREADS)
65: PC, target :: MPC(MAXTHREADS)
67: Mat, pointer :: col_f_mat
68: Vec, pointer :: col_f_vecb
69: Vec, pointer :: col_f_vecx
70: KSP, pointer :: col_f_ksp
71: PC, pointer :: pc
73: PetscInt :: ith, nthreads
74: PetscErrorCode ierr
76: integer, parameter :: nz_per_row = 9
77: integer, parameter :: n =100
78: integer :: i,j,ij,ij2,ii,jj,nz,ip, dx,dy,icase
79: integer, allocatable :: ilist(:),jlist(:)
80: PetscScalar :: aij
81: PetscScalar, allocatable :: alist(:)
82: logical :: isvalid_ii, isvalid_jj, is_diag
84: PetscInt nrow
85: PetscInt ncol
86: PetscScalar, allocatable :: x(:), b(:)
87: real(8) :: err(NCASES)
89: integer :: t1,t2,count_rate
90: real :: ttime
92: PetscCallA(PetscInitialize(ierr))
94: allocate(ilist(n*n*nz_per_row),jlist(n*n*nz_per_row),alist(n*n*nz_per_row))
95: allocate(x(0:(n*n-1)),b(0:(n*n-1)))
96: nrow = n*n
97: ncol = nrow
99: nthreads = 1
100: !$omp parallel
101: !$omp master
102: !$ nthreads = omp_get_num_threads()
103: !$omp end master
104: !$omp end parallel
105: print*,'nthreads = ', nthreads,' NCASES = ', NCASES
107: call split_indices(NCASES,nthreads,ibeg,iend)
109: !$omp parallel do &
110: !$omp private(ith,ierr) &
111: !$omp private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp)
112: do ith=1,nthreads
113: col_f_mat => Mcol_f_mat(ith)
114: col_f_vecb => Mcol_f_vecb(ith)
115: col_f_vecx => Mcol_f_vecx(ith)
116: col_f_ksp => Mcol_f_ksp(ith)
118: PetscCallA(MatCreateSeqAIJ( PETSC_COMM_SELF, nrow,ncol, nz_per_row,PETSC_NULL_INTEGER_ARRAY, col_f_mat, ierr))
119: PetscCallA(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nrow,PETSC_NULL_SCALAR_ARRAY, col_f_vecb, ierr))
120: PetscCallA(VecDuplicate(col_f_vecb, col_f_vecx,ierr))
121: PetscCallA(KSPCreate(PETSC_COMM_SELF, col_f_ksp,ierr))
122: enddo
124: ! -----------------------
125: ! setup sparsity pattern
126: ! -----------------------
127: nz = 0
128: do j=1,n
129: do i=1,n
130: ij = i + (j-1)*n
131: do dx=-1,1
132: do dy=-1,1
133: ii = i + dx
134: jj = j + dy
136: ij2 = ii + (jj-1)*n
137: isvalid_ii = (1 <= ii).and.(ii <= n)
138: isvalid_jj = (1 <= jj).and.(jj <= n)
139: if (isvalid_ii.and.isvalid_jj) then
140: is_diag = (ij .eq. ij2)
141: nz = nz + 1
142: ilist(nz) = ij
143: jlist(nz) = ij2
144: if (is_diag) then
145: aij = 4.0
146: else
147: aij = -1.0
148: endif
149: alist(nz) = aij
150: endif
151: enddo
152: enddo
153: enddo
154: enddo
156: print*,'nz = ', nz
158: ! ----------------------------------
159: ! convert from Fortran to C indexing
160: ! ----------------------------------
161: ilist(1:nz) = ilist(1:nz) - 1
162: jlist(1:nz) = jlist(1:nz) - 1
164: ! --------------
165: ! setup matrices
166: ! --------------
167: call system_clock(t1,count_rate)
169: !$omp parallel do &
170: !$omp& private(ith,icase,ip,i,j,ii,jj,aij,ierr,x,b) &
171: !$omp& private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp,pc)
172: do ith=1,nthreads
173: col_f_mat => Mcol_f_mat(ith)
174: col_f_vecb => Mcol_f_vecb(ith)
175: col_f_vecx => Mcol_f_vecx(ith)
176: col_f_ksp => Mcol_f_ksp(ith)
177: pc => MPC(ith)
179: do icase=ibeg(ith),iend(ith)
181: do ip=1,nz
182: ii = ilist(ip)
183: jj = jlist(ip)
184: aij = alist(ip)
185: PetscCallA(MatSetValue(col_f_mat,ii,jj,aij,INSERT_VALUES,ierr))
186: enddo
187: PetscCallA(MatAssemblyBegin(col_f_mat,MAT_FINAL_ASSEMBLY,ierr))
188: PetscCallA(MatAssemblyEnd(col_f_mat,MAT_FINAL_ASSEMBLY,ierr))
189: PetscCallA(KSPSetOperators(col_f_ksp,col_f_mat,col_f_mat,ierr))
191: ! set linear solver
192: PetscCallA(KSPGetPC(col_f_ksp,pc,ierr))
193: PetscCallA(PCSetType(pc,PCLU,ierr))
195: ! associate petsc vector with allocated array
196: x(:) = 1
197: !$omp critical
198: PetscCallA(VecPlaceArray(col_f_vecx,x,ierr))
199: !$omp end critical
201: b(:) = 0
202: do ip=1,nz
203: i = ilist(ip)
204: j = jlist(ip)
205: aij = alist(ip)
206: b(i) = b(i) + aij*x(j)
207: enddo
208: x = 0
209: !$omp critical
210: PetscCallA(VecPlaceArray(col_f_vecb,b,ierr))
211: !$omp end critical
213: ! -----------------------------------------------------------
214: ! key test, need to solve multiple linear systems in parallel
215: ! -----------------------------------------------------------
216: PetscCallA(KSPSetFromOptions(col_f_ksp,ierr))
218: PetscCallA(KSPSetUp(col_f_ksp,ierr))
219: PetscCallA(KSPSolve(col_f_ksp,col_f_vecb,col_f_vecx,ierr))
221: ! ------------
222: ! check answer
223: ! ------------
224: err(icase) = maxval(abs(x(:)-1))
226: !$omp critical
227: PetscCallA(VecResetArray(col_f_vecx,ierr))
228: !$omp end critical
230: !$omp critical
231: PetscCallA(VecResetArray(col_f_vecb,ierr))
232: !$omp end critical
234: enddo
235: enddo
237: !$omp parallel do &
238: !$omp private(ith,ierr) &
239: !$omp private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp)
240: do ith=1,nthreads
241: col_f_mat => Mcol_f_mat(ith)
242: col_f_vecb => Mcol_f_vecb(ith)
243: col_f_vecx => Mcol_f_vecx(ith)
244: col_f_ksp => Mcol_f_ksp(ith)
246: PetscCallA(MatDestroy(col_f_mat, ierr))
247: PetscCallA(VecDestroy(col_f_vecb, ierr))
248: PetscCallA(VecDestroy(col_f_vecx,ierr))
250: PetscCallA(KSPDestroy(col_f_ksp,ierr))
252: enddo
254: call system_clock(t2,count_rate)
255: ttime = real(t2-t1)/real(count_rate)
256: write(*,*) 'total time is ',ttime
258: write(*,*) 'maxval(err) ', maxval(err)
259: do icase=1,NCASES
260: if (err(icase) > tol) then
261: write(*,*) 'icase,err(icase) ',icase,err(icase)
262: endif
263: enddo
265: deallocate(ilist,jlist,alist)
266: deallocate(x,b)
267: PetscCallA(PetscFinalize(ierr))
268: end program tpetsc
270: !/*TEST
271: !
272: ! build:
273: ! requires: double !complex !defined(PETSC_USE_64BIT_INDICES)
274: !
275: ! test:
276: ! output_file: output/ex61f_1.out
277: ! TODO: Need to determine how to test OpenMP code
278: !
279: !TEST*/