Actual source code: ex18.c

  1: static char help[] = "Demonstrates the use of the COO interface to PETSc matrices for finite element computations\n\n";

  3: /*
  4:      The COO interface for PETSc matrices provides a convenient way to provide finite element stiffness matrices to PETSc matrix that should work
  5:    well on both CPUs and GPUs. It is an alternative to using MatSetValues()

  7:      This example is intended for people who are NOT using DMPLEX or libCEED or any other higher-level infrastructure for finite elements;
  8:    it is only to demonstrate the concepts in a simple way for those people who are interested and for those people who are using PETSc for
  9:    linear algebra solvers but are managing their own finite element process.

 11:      Please do NOT use this example as a starting point to writing your own finite element code from scratch!

 13:      Each element in this example has three vertices; hence the usage below needs to be adjusted for elements of a different number of vertices.
 14: */

 16: #include <petscmat.h>
 17: #include "ex18.h"

 19: static PetscErrorCode CreateFEStruct(FEStruct *fe)
 20: {
 21:   PetscFunctionBeginUser;
 22:   fe->Nv = 5;
 23:   fe->Ne = 3;
 24:   PetscCall(PetscMalloc1(3 * fe->Ne, &fe->vertices));
 25:   /* the three vertices associated with each element in order of element */
 26:   fe->vertices[0 + 0] = 0;
 27:   fe->vertices[0 + 1] = 1;
 28:   fe->vertices[0 + 2] = 2;
 29:   fe->vertices[3 + 0] = 2;
 30:   fe->vertices[3 + 1] = 1;
 31:   fe->vertices[3 + 2] = 3;
 32:   fe->vertices[6 + 0] = 2;
 33:   fe->vertices[6 + 1] = 4;
 34:   fe->vertices[6 + 2] = 3;
 35:   fe->n               = 5;
 36:   PetscFunctionReturn(PETSC_SUCCESS);
 37: }

 39: static PetscErrorCode DestroyFEStruct(FEStruct *fe)
 40: {
 41:   PetscFunctionBeginUser;
 42:   PetscCall(PetscFree(fe->vertices));
 43:   PetscCall(PetscFree(fe->coo));
 44:   PetscFunctionReturn(PETSC_SUCCESS);
 45: }

 47: static PetscErrorCode CreateMatrix(FEStruct *fe, Mat *A)
 48: {
 49:   PetscInt *oor, *ooc, cnt = 0;

 51:   PetscFunctionBeginUser;
 52:   PetscCall(MatCreate(PETSC_COMM_WORLD, A));
 53:   PetscCall(MatSetSizes(*A, fe->n, fe->n, PETSC_DECIDE, PETSC_DECIDE));
 54:   PetscCall(MatSetFromOptions(*A));

 56:   /* determine for each entry in each element stiffness matrix the global row and column */
 57:   /* since the element is triangular with piecewise linear basis functions there are three degrees of freedom per element, one for each vertex */
 58:   PetscCall(PetscMalloc2(3 * 3 * fe->Ne, &oor, 3 * 3 * fe->Ne, &ooc));
 59:   for (PetscInt e = 0; e < fe->Ne; e++) {
 60:     for (PetscInt vi = 0; vi < 3; vi++) {
 61:       for (PetscInt vj = 0; vj < 3; vj++) {
 62:         oor[cnt]   = fe->vertices[3 * e + vi];
 63:         ooc[cnt++] = fe->vertices[3 * e + vj];
 64:       }
 65:     }
 66:   }
 67:   PetscCall(MatSetPreallocationCOO(*A, 3 * 3 * fe->Ne, oor, ooc));
 68:   PetscCall(PetscFree2(oor, ooc));

 70:   /* determine the offset into the COO value array the offset of each element stiffness; there are 9 = 3*3 entries for each element stiffness */
 71:   /* for lists of elements with different numbers of degrees of freedom associated with each element the offsets will not be uniform */
 72:   PetscCall(PetscMalloc1(fe->Ne, &fe->coo));
 73:   fe->coo[0] = 0;
 74:   for (PetscInt e = 1; e < fe->Ne; e++) fe->coo[e] = fe->coo[e - 1] + 3 * 3;
 75:   PetscFunctionReturn(PETSC_SUCCESS);
 76: }

 78: static PetscErrorCode FillMatrixCPU(FEStruct *fe, Mat A)
 79: {
 80:   PetscScalar s[9];

 82:   PetscFunctionBeginUser;
 83:   /* simulation of traditional PETSc CPU based finite assembly process */
 84:   for (PetscInt e = 0; e < fe->Ne; e++) {
 85:     for (PetscInt vi = 0; vi < 3; vi++) {
 86:       for (PetscInt vj = 0; vj < 3; vj++) s[3 * vi + vj] = vi + 2 * vj;
 87:     }
 88:     PetscCall(MatSetValues(A, 3, fe->vertices + 3 * e, 3, fe->vertices + 3 * e, s, ADD_VALUES));
 89:   }
 90:   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
 91:   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
 92:   PetscFunctionReturn(PETSC_SUCCESS);
 93: }

 95: /*
 96:    Shows an example of tracking element offsets explicitly, which allows for
 97:    mixed-topology meshes and combining both volume and surface parts into the weak form.
 98: */
 99: static PetscErrorCode FillMatrixCPUCOO(FEStruct *fe, Mat A)
100: {
101:   PetscScalar *v, *s;

103:   PetscFunctionBeginUser;
104:   /* simulation of CPU based finite assembly process with COO */
105:   PetscCall(PetscMalloc1(3 * 3 * fe->Ne, &v));
106:   for (PetscInt e = 0; e < fe->Ne; e++) {
107:     s = v + fe->coo[e]; /* point to location in COO of current element stiffness */
108:     for (PetscInt vi = 0; vi < 3; vi++) {
109:       for (PetscInt vj = 0; vj < 3; vj++) s[3 * vi + vj] = vi + 2 * vj;
110:     }
111:   }
112:   PetscCall(MatSetValuesCOO(A, v, ADD_VALUES));
113:   PetscCall(PetscFree(v));
114:   PetscFunctionReturn(PETSC_SUCCESS);
115: }

117: /*
118:   Uses a multi-dimensional indexing technique that works for homogeneous meshes
119:   such as single-topology with volume integral only.
120: */
121: static PetscErrorCode FillMatrixCPUCOO3d(FEStruct *fe, Mat A)
122: {
123:   PetscScalar(*s)[3][3];

125:   PetscFunctionBeginUser;
126:   /* simulation of CPU based finite assembly process with COO */
127:   PetscCall(PetscMalloc1(fe->Ne, &s));
128:   for (PetscInt e = 0; e < fe->Ne; e++) {
129:     for (PetscInt vi = 0; vi < 3; vi++) {
130:       for (PetscInt vj = 0; vj < 3; vj++) s[e][vi][vj] = vi + 2 * vj;
131:     }
132:   }
133:   PetscCall(MatSetValuesCOO(A, (PetscScalar *)s, INSERT_VALUES));
134:   PetscCall(PetscFree(s));
135:   PetscFunctionReturn(PETSC_SUCCESS);
136: }

138: int main(int argc, char **args)
139: {
140:   Mat         A, B;
141:   FEStruct    fe;
142:   PetscMPIInt size;
143:   PetscBool   is_kokkos, is_cuda;

145:   PetscFunctionBeginUser;
146:   PetscCall(PetscInitialize(&argc, &args, NULL, help));
147:   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
148:   PetscCheck(size <= 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "Demonstration is only for sequential runs");

150:   PetscCall(CreateFEStruct(&fe));

152:   PetscCall(CreateMatrix(&fe, &B));
153:   PetscCall(MatDuplicate(B, MAT_DO_NOT_COPY_VALUES, &A));
154:   PetscCall(MatDestroy(&B));

156:   PetscCall(FillMatrixCPU(&fe, A));
157:   PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD));

159:   PetscCall(MatZeroEntries(A));
160:   PetscCall(FillMatrixCPUCOO(&fe, A));
161:   PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD));

163:   PetscCall(MatZeroEntries(A));
164:   PetscCall(FillMatrixCPUCOO3d(&fe, A));
165:   PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD));

167:   PetscCall(MatZeroEntries(A));
168:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJKOKKOS, &is_kokkos));
169:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJCUSPARSE, &is_cuda));
170: #if defined(PETSC_HAVE_KOKKOS)
171:   if (is_kokkos) PetscCall(FillMatrixKokkosCOO(&fe, A));
172: #endif
173: #if defined(PETSC_HAVE_CUDA)
174:   if (is_cuda) PetscCall(FillMatrixCUDACOO(&fe, A));
175: #endif
176:   PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD));

178:   PetscCall(MatDestroy(&A));
179:   PetscCall(DestroyFEStruct(&fe));
180:   PetscCall(PetscFinalize());
181:   return 0;
182: }

184: /*TEST
185:   build:
186:     requires: cuda kokkos_kernels
187:     depends: ex18cu.cu ex18kok.kokkos.cxx

189:   testset:
190:     filter: grep -v "type"
191:     output_file: output/ex18_1.out

193:     test:
194:       suffix: kok
195:       requires: kokkos_kernels
196:       args: -mat_type aijkokkos

198:     test:
199:       suffix: cuda
200:       requires: cuda
201:       args: -mat_type aijcusparse

203:     test:
204:       suffix: hip
205:       requires: hip
206:       args: -mat_type aijhipsparse

208: TEST*/