Actual source code: ex18.c

  1: static char help[] = "Demonstrates the use of the COO interface to PETSc matrices for finite element computations\n\n";

  3: /*
  4:      The COO interface for PETSc matrices provides a convenient way to provide finite element element stiffness matrices to PETSc matrix that should work
  5:    well on both CPUs and GPUs. It is an alternative to using MatSetValues()

  7:      This example is intended for people who are NOT using DMPLEX or libCEED or any other higher-level infrastructure for finite elements;
  8:    it is only to demonstrate the concepts in a simple way for those people who are interested and for those people who are using PETSc for
  9:    linear algebra solvers but are managing their own finite element process.

 11:      Please do NOT use this example as a starting point to writing your own finite element code from scratch!

 13:      Each element in this example has three vertices; hence the the usage below needs to be adjusted for elements of a different number of vertices.
 14: */

 16: #include <petscmat.h>
 17: #include "ex18.h"

 19: static PetscErrorCode CreateFEStruct(FEStruct *fe)
 20: {
 22:   fe->Nv = 5;
 23:   fe->Ne = 3;
 24:   PetscMalloc1(3 * fe->Ne, &fe->vertices);
 25:   /* the three vertices associated with each element in order of element */
 26:   fe->vertices[0 + 0] = 0;
 27:   fe->vertices[0 + 1] = 1;
 28:   fe->vertices[0 + 2] = 2;
 29:   fe->vertices[3 + 0] = 2;
 30:   fe->vertices[3 + 1] = 1;
 31:   fe->vertices[3 + 2] = 3;
 32:   fe->vertices[6 + 0] = 2;
 33:   fe->vertices[6 + 1] = 4;
 34:   fe->vertices[6 + 2] = 3;
 35:   fe->n               = 5;
 36:   return 0;
 37: }

 39: static PetscErrorCode DestroyFEStruct(FEStruct *fe)
 40: {
 42:   PetscFree(fe->vertices);
 43:   PetscFree(fe->coo);
 44:   return 0;
 45: }

 47: static PetscErrorCode CreateMatrix(FEStruct *fe, Mat *A)
 48: {
 49:   PetscInt *oor, *ooc, cnt = 0;

 52:   MatCreate(PETSC_COMM_WORLD, A);
 53:   MatSetSizes(*A, fe->n, fe->n, PETSC_DECIDE, PETSC_DECIDE);
 54:   MatSetFromOptions(*A);

 56:   /* determine for each entry in each element stiffness matrix the global row and colum */
 57:   /* since the element is triangular with piecewise linear basis functions there are three degrees of freedom per element, one for each vertex */
 58:   PetscMalloc2(3 * 3 * fe->Ne, &oor, 3 * 3 * fe->Ne, &ooc);
 59:   for (PetscInt e = 0; e < fe->Ne; e++) {
 60:     for (PetscInt vi = 0; vi < 3; vi++) {
 61:       for (PetscInt vj = 0; vj < 3; vj++) {
 62:         oor[cnt]   = fe->vertices[3 * e + vi];
 63:         ooc[cnt++] = fe->vertices[3 * e + vj];
 64:       }
 65:     }
 66:   }
 67:   MatSetPreallocationCOO(*A, 3 * 3 * fe->Ne, oor, ooc);
 68:   PetscFree2(oor, ooc);

 70:   /* determine the offset into the COO value array the offset of each element stiffness; there are 9 = 3*3 entries for each element stiffness */
 71:   /* for lists of elements with different numbers of degrees of freedom assocated with each element the offsets will not be uniform */
 72:   PetscMalloc1(fe->Ne, &fe->coo);
 73:   fe->coo[0] = 0;
 74:   for (PetscInt e = 1; e < fe->Ne; e++) fe->coo[e] = fe->coo[e - 1] + 3 * 3;
 75:   return 0;
 76: }

 78: static PetscErrorCode FillMatrixCPU(FEStruct *fe, Mat A)
 79: {
 80:   PetscScalar s[9];

 83:   /* simulation of traditional PETSc CPU based finite assembly process */
 84:   for (PetscInt e = 0; e < fe->Ne; e++) {
 85:     for (PetscInt vi = 0; vi < 3; vi++) {
 86:       for (PetscInt vj = 0; vj < 3; vj++) s[3 * vi + vj] = vi + 2 * vj;
 87:     }
 88:     MatSetValues(A, 3, fe->vertices + 3 * e, 3, fe->vertices + 3 * e, s, ADD_VALUES);
 89:   }
 90:   MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
 91:   MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
 92:   return 0;
 93: }

 95: /*
 96:    Shows an example of tracking element offsets explicitly, which allows for
 97:    mixed-topology meshes and combining both volume and surface parts into the weak form.
 98: */
 99: static PetscErrorCode FillMatrixCPUCOO(FEStruct *fe, Mat A)
100: {
101:   PetscScalar *v, *s;

104:   /* simulation of CPU based finite assembly process with COO */
105:   PetscMalloc1(3 * 3 * fe->Ne, &v);
106:   for (PetscInt e = 0; e < fe->Ne; e++) {
107:     s = v + fe->coo[e]; /* point to location in COO of current element stiffness */
108:     for (PetscInt vi = 0; vi < 3; vi++) {
109:       for (PetscInt vj = 0; vj < 3; vj++) s[3 * vi + vj] = vi + 2 * vj;
110:     }
111:   }
112:   MatSetValuesCOO(A, v, ADD_VALUES);
113:   PetscFree(v);
114:   return 0;
115: }

117: /*
118:   Uses a multi-dimensional indexing technique that works for homogeneous meshes
119:   such as single-topology with volume integral only.
120: */
121: static PetscErrorCode FillMatrixCPUCOO3d(FEStruct *fe, Mat A)
122: {
123:   PetscScalar(*s)[3][3];

126:   /* simulation of CPU based finite assembly process with COO */
127:   PetscMalloc1(fe->Ne, &s);
128:   for (PetscInt e = 0; e < fe->Ne; e++) {
129:     for (PetscInt vi = 0; vi < 3; vi++) {
130:       for (PetscInt vj = 0; vj < 3; vj++) s[e][vi][vj] = vi + 2 * vj;
131:     }
132:   }
133:   MatSetValuesCOO(A, (PetscScalar *)s, INSERT_VALUES);
134:   PetscFree(s);
135:   return 0;
136: }

138: int main(int argc, char **args)
139: {
140:   Mat         A;
141:   FEStruct    fe;
142:   PetscMPIInt size;
143:   PetscBool   is_kokkos, is_cuda;

146:   PetscInitialize(&argc, &args, (char *)0, help);
147:   MPI_Comm_size(PETSC_COMM_WORLD, &size);

150:   CreateFEStruct(&fe);
151:   CreateMatrix(&fe, &A);

153:   FillMatrixCPU(&fe, A);
154:   MatView(A, PETSC_VIEWER_STDOUT_WORLD);

156:   MatZeroEntries(A);
157:   FillMatrixCPUCOO(&fe, A);
158:   MatView(A, PETSC_VIEWER_STDOUT_WORLD);

160:   MatZeroEntries(A);
161:   FillMatrixCPUCOO3d(&fe, A);
162:   MatView(A, PETSC_VIEWER_STDOUT_WORLD);

164:   MatZeroEntries(A);
165:   PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJKOKKOS, &is_kokkos);
166:   PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJCUSPARSE, &is_cuda);
167: #if defined(PETSC_HAVE_KOKKOS)
168:   if (is_kokkos) FillMatrixKokkosCOO(&fe, A);
169: #endif
170: #if defined(PETSC_HAVE_CUDA)
171:   if (is_cuda) FillMatrixCUDACOO(&fe, A);
172: #endif
173:   MatView(A, PETSC_VIEWER_STDOUT_WORLD);

175:   MatDestroy(&A);
176:   DestroyFEStruct(&fe);
177:   PetscFinalize();
178:   return 0;
179: }

181: /*TEST
182:   build:
183:     requires: cuda kokkos_kernels
184:     depends: ex18cu.cu ex18kok.kokkos.cxx

186:   testset:
187:     filter: grep -v "type"
188:     output_file: output/ex18_1.out

190:     test:
191:       suffix: kok
192:       requires: kokkos_kernels
193:       args: -mat_type aijkokkos

195:     test:
196:       suffix: cuda
197:       requires: cuda
198:       args: -mat_type aijcusparse

200: TEST*/