Actual source code: ex61f.F90
1: !
2: ! Demonstrates having each OpenMP thread manage its own PETSc objects and solves
3: ! - each thread is ONLY allowed to access objects that IT created
4: ! that is, threads cannot shared objects
5: !
6: ! Run with "export OMP_NUM_THREADS=16 ./ex61f"
7: ! to use 16 independent threads
8: !
9: ! ./configure --with-threadsafety --with-openmp
10: !
11: #include <petsc/finclude/petsc.h>
12: module ex61fmodule
13: implicit none
14: contains
15: subroutine split_indices(total, num_pieces, ibeg, iend)
16: implicit none
18: integer :: total
19: integer :: num_pieces
20: integer :: ibeg(num_pieces), iend(num_pieces)
21: integer :: itmp1, itmp2, ioffset, i
23: itmp1 = total/num_pieces
24: itmp2 = mod(total, num_pieces)
25: ioffset = 0
26: do i = 1, itmp2
27: ibeg(i) = ioffset + 1
28: iend(i) = ioffset + (itmp1 + 1)
29: ioffset = iend(i)
30: end do
31: do i = itmp2 + 1, num_pieces
32: ibeg(i) = ioffset + 1
33: if (ibeg(i) > total) then
34: iend(i) = ibeg(i) - 1
35: else
36: iend(i) = ioffset + itmp1
37: ioffset = iend(i)
38: end if
39: end do
41: end subroutine split_indices
42: end module ex61fmodule
44: program tpetsc
46: use ex61fmodule
47: use petsc
48: !$ use OMP_LIB
49: implicit none
50: ! ----------------------------
51: ! test concurrent PETSc solver
52: ! ----------------------------
54: integer, parameter :: NCASES = 100
55: integer, parameter :: MAXTHREADS = 64
56: real(8), parameter :: tol = 1.0d-6
58: integer, dimension(MAXTHREADS) :: ibeg, iend
60: !$ integer :: omp_get_num_threads
62: Mat, target :: Mcol_f_mat(MAXTHREADS)
63: Vec, target :: Mcol_f_vecb(MAXTHREADS)
64: Vec, target :: Mcol_f_vecx(MAXTHREADS)
65: KSP, target :: Mcol_f_ksp(MAXTHREADS)
66: PC, target :: MPC(MAXTHREADS)
68: Mat, pointer :: col_f_mat
69: Vec, pointer :: col_f_vecb
70: Vec, pointer :: col_f_vecx
71: KSP, pointer :: col_f_ksp
72: PC, pointer :: pc
74: PetscInt :: ith, nthreads
75: PetscErrorCode ierr
77: integer, parameter :: nz_per_row = 9
78: integer, parameter :: n = 100
79: integer :: i, j, ij, ij2, ii, jj, nz, ip, dx, dy, icase
80: integer, allocatable :: ilist(:), jlist(:)
81: PetscScalar :: aij
82: PetscScalar, allocatable :: alist(:)
83: logical :: isvalid_ii, isvalid_jj, is_diag
85: PetscInt nrow
86: PetscInt ncol
87: PetscScalar, allocatable :: x(:), b(:)
88: real(8) :: err(NCASES)
90: integer :: t1, t2, count_rate
91: real :: ttime
93: PetscCallA(PetscInitialize(ierr))
95: allocate (ilist(n*n*nz_per_row), jlist(n*n*nz_per_row), alist(n*n*nz_per_row))
96: allocate (x(0:(n*n - 1)), b(0:(n*n - 1)))
97: nrow = n*n
98: ncol = nrow
100: nthreads = 1
101: !$omp parallel
102: !$omp master
103: !$ nthreads = omp_get_num_threads()
104: !$omp end master
105: !$omp end parallel
106: print *, 'nthreads = ', nthreads, ' NCASES = ', NCASES
108: call split_indices(NCASES, nthreads, ibeg, iend)
110: !$omp parallel do &
111: !$omp private(ith,ierr) &
112: !$omp private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp)
113: do ith = 1, nthreads
114: col_f_mat => Mcol_f_mat(ith)
115: col_f_vecb => Mcol_f_vecb(ith)
116: col_f_vecx => Mcol_f_vecx(ith)
117: col_f_ksp => Mcol_f_ksp(ith)
119: PetscCallA(MatCreateSeqAIJ(PETSC_COMM_SELF, nrow, ncol, nz_per_row, PETSC_NULL_INTEGER_ARRAY, col_f_mat, ierr))
120: PetscCallA(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nrow, PETSC_NULL_SCALAR_ARRAY, col_f_vecb, ierr))
121: PetscCallA(VecDuplicate(col_f_vecb, col_f_vecx, ierr))
122: PetscCallA(KSPCreate(PETSC_COMM_SELF, col_f_ksp, ierr))
123: end do
125: ! -----------------------
126: ! setup sparsity pattern
127: ! -----------------------
128: nz = 0
129: do j = 1, n
130: do i = 1, n
131: ij = i + (j - 1)*n
132: do dx = -1, 1
133: do dy = -1, 1
134: ii = i + dx
135: jj = j + dy
137: ij2 = ii + (jj - 1)*n
138: isvalid_ii = (1 <= ii) .and. (ii <= n)
139: isvalid_jj = (1 <= jj) .and. (jj <= n)
140: if (isvalid_ii .and. isvalid_jj) then
141: is_diag = (ij == ij2)
142: nz = nz + 1
143: ilist(nz) = ij
144: jlist(nz) = ij2
145: if (is_diag) then
146: aij = 4.0
147: else
148: aij = -1.0
149: end if
150: alist(nz) = aij
151: end if
152: end do
153: end do
154: end do
155: end do
157: print *, 'nz = ', nz
159: ! ----------------------------------
160: ! convert from Fortran to C indexing
161: ! ----------------------------------
162: ilist(1:nz) = ilist(1:nz) - 1
163: jlist(1:nz) = jlist(1:nz) - 1
165: ! --------------
166: ! setup matrices
167: ! --------------
168: call system_clock(t1, count_rate)
170: !$omp parallel do &
171: !$omp private(ith,icase,ip,i,j,ii,jj,aij,ierr,x,b) &
172: !$omp private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp,pc)
173: do ith = 1, nthreads
174: col_f_mat => Mcol_f_mat(ith)
175: col_f_vecb => Mcol_f_vecb(ith)
176: col_f_vecx => Mcol_f_vecx(ith)
177: col_f_ksp => Mcol_f_ksp(ith)
178: pc => MPC(ith)
180: do icase = ibeg(ith), iend(ith)
182: do ip = 1, nz
183: ii = ilist(ip)
184: jj = jlist(ip)
185: aij = alist(ip)
186: PetscCallA(MatSetValue(col_f_mat, ii, jj, aij, INSERT_VALUES, ierr))
187: end do
188: PetscCallA(MatAssemblyBegin(col_f_mat, MAT_FINAL_ASSEMBLY, ierr))
189: PetscCallA(MatAssemblyEnd(col_f_mat, MAT_FINAL_ASSEMBLY, ierr))
190: PetscCallA(KSPSetOperators(col_f_ksp, col_f_mat, col_f_mat, ierr))
192: ! set linear solver
193: PetscCallA(KSPGetPC(col_f_ksp, pc, ierr))
194: PetscCallA(PCSetType(pc, PCLU, ierr))
196: ! associate PETSc vector with allocated array
197: x(:) = 1
198: !$omp critical
199: PetscCallA(VecPlaceArray(col_f_vecx, x, ierr))
200: !$omp end critical
202: b(:) = 0
203: do ip = 1, nz
204: i = ilist(ip)
205: j = jlist(ip)
206: aij = alist(ip)
207: b(i) = b(i) + aij*x(j)
208: end do
209: x = 0
210: !$omp critical
211: PetscCallA(VecPlaceArray(col_f_vecb, b, ierr))
212: !$omp end critical
214: ! -----------------------------------------------------------
215: ! key test, need to solve multiple linear systems in parallel
216: ! -----------------------------------------------------------
217: PetscCallA(KSPSetFromOptions(col_f_ksp, ierr))
219: PetscCallA(KSPSetUp(col_f_ksp, ierr))
220: PetscCallA(KSPSolve(col_f_ksp, col_f_vecb, col_f_vecx, ierr))
222: ! ------------
223: ! check answer
224: ! ------------
225: err(icase) = maxval(abs(x(:) - 1))
227: !$omp critical
228: PetscCallA(VecResetArray(col_f_vecx, ierr))
229: !$omp end critical
231: !$omp critical
232: PetscCallA(VecResetArray(col_f_vecb, ierr))
233: !$omp end critical
235: end do
236: end do
238: !$omp parallel do &
239: !$omp private(ith,ierr) &
240: !$omp private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp)
241: do ith = 1, nthreads
242: col_f_mat => Mcol_f_mat(ith)
243: col_f_vecb => Mcol_f_vecb(ith)
244: col_f_vecx => Mcol_f_vecx(ith)
245: col_f_ksp => Mcol_f_ksp(ith)
247: PetscCallA(MatDestroy(col_f_mat, ierr))
248: PetscCallA(VecDestroy(col_f_vecb, ierr))
249: PetscCallA(VecDestroy(col_f_vecx, ierr))
251: PetscCallA(KSPDestroy(col_f_ksp, ierr))
253: end do
255: call system_clock(t2, count_rate)
256: ttime = real(t2 - t1)/real(count_rate)
257: write (*, *) 'total time is ', ttime
259: write (*, *) 'maxval(err) ', maxval(err)
260: do icase = 1, NCASES
261: if (err(icase) > tol) then
262: write (*, *) 'icase,err(icase) ', icase, err(icase)
263: end if
264: end do
266: deallocate (ilist, jlist, alist)
267: deallocate (x, b)
268: PetscCallA(PetscFinalize(ierr))
269: end program tpetsc
271: !/*TEST
272: !
273: ! build:
274: ! requires: double !complex !defined(PETSC_USE_64BIT_INDICES)
275: !
276: ! test:
277: ! output_file: output/ex61f_1.out
278: ! TODO: Need to determine how to test OpenMP code
279: !
280: !TEST*/