Actual source code: ex1.c

  1: static char help[] = "Tests LU, Cholesky, and QR factorization and MatMatSolve() for a sequential dense matrix. \n\
  2:                       For MATSEQDENSE matrix, the factorization is just a thin wrapper to LAPACK.       \n\
  3:                       For MATSEQDENSECUDA, it uses cusolverDn routines \n\n";

  5: #include <petscmat.h>

  7: static PetscErrorCode createMatsAndVecs(PetscInt m, PetscInt n, PetscInt nrhs, PetscBool full, Mat *_mat, Mat *_RHS, Mat *_SOLU, Vec *_x, Vec *_y, Vec *_b)
  8: {
  9:   PetscRandom rand;
 10:   Mat         mat, RHS, SOLU;
 11:   PetscInt    rstart, rend;
 12:   PetscInt    cstart, cend;
 13:   PetscScalar value = 1.0;
 14:   Vec         x, y, b;

 16:   PetscFunctionBegin;
 17:   /* create multiple vectors RHS and SOLU */
 18:   PetscCall(MatCreate(PETSC_COMM_WORLD, &RHS));
 19:   PetscCall(MatSetSizes(RHS, PETSC_DECIDE, PETSC_DECIDE, m, nrhs));
 20:   PetscCall(MatSetType(RHS, MATDENSE));
 21:   PetscCall(MatSetOptionsPrefix(RHS, "rhs_"));
 22:   PetscCall(MatSetFromOptions(RHS));
 23:   PetscCall(MatSeqDenseSetPreallocation(RHS, NULL));

 25:   PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rand));
 26:   PetscCall(PetscRandomSetFromOptions(rand));
 27:   PetscCall(MatSetRandom(RHS, rand));

 29:   if (m == n) PetscCall(MatDuplicate(RHS, MAT_DO_NOT_COPY_VALUES, &SOLU));
 30:   else {
 31:     PetscCall(MatCreate(PETSC_COMM_WORLD, &SOLU));
 32:     PetscCall(MatSetSizes(SOLU, PETSC_DECIDE, PETSC_DECIDE, n, nrhs));
 33:     PetscCall(MatSetType(SOLU, MATDENSE));
 34:     PetscCall(MatSeqDenseSetPreallocation(SOLU, NULL));
 35:   }
 36:   PetscCall(MatSetRandom(SOLU, rand));

 38:   /* create matrix */
 39:   PetscCall(MatCreate(PETSC_COMM_WORLD, &mat));
 40:   PetscCall(MatSetSizes(mat, PETSC_DECIDE, PETSC_DECIDE, m, n));
 41:   PetscCall(MatSetType(mat, MATDENSE));
 42:   PetscCall(MatSetFromOptions(mat));
 43:   PetscCall(MatSetUp(mat));
 44:   PetscCall(MatGetOwnershipRange(mat, &rstart, &rend));
 45:   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
 46:   if (!full) {
 47:     for (PetscInt i = rstart; i < rend; i++) {
 48:       if (m == n) {
 49:         value = (PetscReal)i + 1;
 50:         PetscCall(MatSetValues(mat, 1, &i, 1, &i, &value, INSERT_VALUES));
 51:       } else {
 52:         for (PetscInt j = cstart; j < cend; j++) {
 53:           value = ((PetscScalar)i + 1.) / (PetscSqr(i - j) + 1.);
 54:           PetscCall(MatSetValues(mat, 1, &i, 1, &j, &value, INSERT_VALUES));
 55:         }
 56:       }
 57:     }
 58:     PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
 59:     PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
 60:   } else {
 61:     PetscCall(MatSetRandom(mat, rand));
 62:     if (m == n) {
 63:       Mat T;

 65:       PetscCall(MatMatTransposeMult(mat, mat, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &T));
 66:       PetscCall(MatDestroy(&mat));
 67:       mat = T;
 68:     }
 69:   }

 71:   /* create single vectors */
 72:   PetscCall(MatCreateVecs(mat, &x, &b));
 73:   PetscCall(VecDuplicate(x, &y));
 74:   PetscCall(VecSet(x, value));
 75:   PetscCall(PetscRandomDestroy(&rand));
 76:   *_mat  = mat;
 77:   *_RHS  = RHS;
 78:   *_SOLU = SOLU;
 79:   *_x    = x;
 80:   *_y    = y;
 81:   *_b    = b;
 82:   PetscFunctionReturn(PETSC_SUCCESS);
 83: }

 85: int main(int argc, char **argv)
 86: {
 87:   Mat         mat, F, RHS, SOLU;
 88:   MatInfo     info;
 89:   PetscInt    m = 15, n = 10, i, j, nrhs = 2;
 90:   Vec         x, y, b, ytmp;
 91:   IS          perm;
 92:   PetscReal   norm, tol = PETSC_SMALL;
 93:   PetscMPIInt size;
 94:   char        solver[64];
 95:   PetscBool   inplace, full = PETSC_FALSE, ldl = PETSC_TRUE, qr = PETSC_TRUE;

 97:   PetscFunctionBeginUser;
 98:   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
 99:   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
100:   PetscCheck(size == 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only!");
101:   PetscCall(PetscStrncpy(solver, MATSOLVERPETSC, sizeof(solver)));
102:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
103:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
104:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-nrhs", &nrhs, NULL));
105:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-ldl", &ldl, NULL));
106:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-qr", &qr, NULL));
107:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-full", &full, NULL));
108:   PetscCall(PetscOptionsGetString(NULL, NULL, "-solver_type", solver, sizeof(solver), NULL));

110:   PetscCall(createMatsAndVecs(n, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b));
111:   PetscCall(VecDuplicate(y, &ytmp));

113:   /* Only SeqDense* support in-place factorizations and NULL permutations */
114:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQDENSE, &inplace));
115:   PetscCall(MatGetLocalSize(mat, &i, NULL));
116:   PetscCall(MatGetOwnershipRange(mat, &j, NULL));
117:   PetscCall(ISCreateStride(PETSC_COMM_WORLD, i, j, 1, &perm));

119:   PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
120:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "matrix nonzeros = %" PetscInt_FMT ", allocated nonzeros = %" PetscInt_FMT "\n", (PetscInt)info.nz_used, (PetscInt)info.nz_allocated));
121:   PetscCall(MatMult(mat, x, b));

123:   /* Cholesky factorization - perm and factinfo are ignored by LAPACK */
124:   /* in-place Cholesky */
125:   if (inplace) {
126:     Mat RHS2;

128:     if (!ldl) PetscCall(MatSetOption(mat, MAT_SPD, PETSC_TRUE));
129:     PetscCall(MatDuplicate(mat, MAT_COPY_VALUES, &F));
130:     PetscCall(MatCholeskyFactor(F, perm, 0));
131:     PetscCall(MatSolve(F, b, y));
132:     PetscCall(VecAXPY(y, -1.0, x));
133:     PetscCall(VecNorm(y, NORM_2, &norm));
134:     if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for in-place Cholesky %g\n", (double)norm));

136:     PetscCall(MatMatSolve(F, RHS, SOLU));
137:     PetscCall(MatMatMult(mat, SOLU, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &RHS2));
138:     PetscCall(MatAXPY(RHS, -1.0, RHS2, SAME_NONZERO_PATTERN));
139:     PetscCall(MatNorm(RHS, NORM_FROBENIUS, &norm));
140:     if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error: Norm of residual for in-place Cholesky (MatMatSolve) %g\n", (double)norm));
141:     PetscCall(MatDestroy(&F));
142:     PetscCall(MatDestroy(&RHS2));
143:   }

145:   /* out-of-place Cholesky */
146:   if (!ldl) PetscCall(MatSetOption(mat, MAT_SPD, PETSC_TRUE));
147:   PetscCall(MatGetFactor(mat, solver, MAT_FACTOR_CHOLESKY, &F));
148:   PetscCall(MatCholeskyFactorSymbolic(F, mat, perm, 0));
149:   PetscCall(MatCholeskyFactorNumeric(F, mat, 0));
150:   PetscCall(MatSolve(F, b, y));
151:   PetscCall(VecAXPY(y, -1.0, x));
152:   PetscCall(VecNorm(y, NORM_2, &norm));
153:   if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for out-of-place Cholesky %g\n", (double)norm));
154:   PetscCall(MatDestroy(&F));

156:   /* LU factorization - perms and factinfo are ignored by LAPACK */
157:   i = n - 1;
158:   PetscCall(MatZeroRows(mat, 1, &i, -1.0, NULL, NULL));
159:   PetscCall(MatMult(mat, x, b));

161:   /* in-place LU */
162:   if (inplace) {
163:     Mat RHS2;

165:     PetscCall(MatDuplicate(mat, MAT_COPY_VALUES, &F));
166:     PetscCall(MatLUFactor(F, perm, perm, 0));
167:     PetscCall(MatSolve(F, b, y));
168:     PetscCall(VecAXPY(y, -1.0, x));
169:     PetscCall(VecNorm(y, NORM_2, &norm));
170:     if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for in-place LU %g\n", (double)norm));
171:     PetscCall(MatMatSolve(F, RHS, SOLU));
172:     PetscCall(MatMatMult(mat, SOLU, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &RHS2));
173:     PetscCall(MatAXPY(RHS, -1.0, RHS2, SAME_NONZERO_PATTERN));
174:     PetscCall(MatNorm(RHS, NORM_FROBENIUS, &norm));
175:     if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error: Norm of residual for in-place LU (MatMatSolve) %g\n", (double)norm));
176:     PetscCall(MatDestroy(&F));
177:     PetscCall(MatDestroy(&RHS2));
178:   }

180:   /* out-of-place LU */
181:   PetscCall(MatGetFactor(mat, solver, MAT_FACTOR_LU, &F));
182:   PetscCall(MatLUFactorSymbolic(F, mat, perm, perm, 0));
183:   PetscCall(MatLUFactorNumeric(F, mat, 0));
184:   PetscCall(MatSolve(F, b, y));
185:   PetscCall(VecAXPY(y, -1.0, x));
186:   PetscCall(VecNorm(y, NORM_2, &norm));
187:   if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for out-of-place LU %g\n", (double)norm));

189:   /* free space */
190:   PetscCall(ISDestroy(&perm));
191:   PetscCall(MatDestroy(&F));
192:   PetscCall(MatDestroy(&mat));
193:   PetscCall(MatDestroy(&RHS));
194:   PetscCall(MatDestroy(&SOLU));
195:   PetscCall(VecDestroy(&x));
196:   PetscCall(VecDestroy(&b));
197:   PetscCall(VecDestroy(&y));
198:   PetscCall(VecDestroy(&ytmp));

200:   if (qr) {
201:     /* setup rectangular */
202:     PetscCall(createMatsAndVecs(m, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b));
203:     PetscCall(VecDuplicate(y, &ytmp));

205:     /* QR factorization - perms and factinfo are ignored by LAPACK */
206:     PetscCall(MatMult(mat, x, b));

208:     /* in-place QR */
209:     if (inplace) {
210:       Mat SOLU2;

212:       PetscCall(MatDuplicate(mat, MAT_COPY_VALUES, &F));
213:       PetscCall(MatQRFactor(F, NULL, 0));
214:       PetscCall(MatSolve(F, b, y));
215:       PetscCall(VecAXPY(y, -1.0, x));
216:       PetscCall(VecNorm(y, NORM_2, &norm));
217:       if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for in-place QR %g\n", (double)norm));
218:       PetscCall(MatMatMult(mat, SOLU, MAT_REUSE_MATRIX, PETSC_DETERMINE, &RHS));
219:       PetscCall(MatDuplicate(SOLU, MAT_DO_NOT_COPY_VALUES, &SOLU2));
220:       PetscCall(MatMatSolve(F, RHS, SOLU2));
221:       PetscCall(MatAXPY(SOLU2, -1.0, SOLU, SAME_NONZERO_PATTERN));
222:       PetscCall(MatNorm(SOLU2, NORM_FROBENIUS, &norm));
223:       if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error: Norm of error for in-place QR (MatMatSolve) %g\n", (double)norm));
224:       PetscCall(MatDestroy(&F));
225:       PetscCall(MatDestroy(&SOLU2));
226:     }

228:     /* out-of-place QR */
229:     PetscCall(MatGetFactor(mat, solver, MAT_FACTOR_QR, &F));
230:     PetscCall(MatQRFactorSymbolic(F, mat, NULL, NULL));
231:     PetscCall(MatQRFactorNumeric(F, mat, NULL));
232:     PetscCall(MatSolve(F, b, y));
233:     PetscCall(VecAXPY(y, -1.0, x));
234:     PetscCall(VecNorm(y, NORM_2, &norm));
235:     if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for out-of-place QR %g\n", (double)norm));

237:     if (m == n) {
238:       /* out-of-place MatSolveTranspose */
239:       PetscCall(MatMultTranspose(mat, x, b));
240:       PetscCall(MatSolveTranspose(F, b, y));
241:       PetscCall(VecAXPY(y, -1.0, x));
242:       PetscCall(VecNorm(y, NORM_2, &norm));
243:       if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for out-of-place QR %g\n", (double)norm));
244:     }

246:     /* free space */
247:     PetscCall(MatDestroy(&F));
248:     PetscCall(MatDestroy(&mat));
249:     PetscCall(MatDestroy(&RHS));
250:     PetscCall(MatDestroy(&SOLU));
251:     PetscCall(VecDestroy(&x));
252:     PetscCall(VecDestroy(&b));
253:     PetscCall(VecDestroy(&y));
254:     PetscCall(VecDestroy(&ytmp));
255:   }
256:   PetscCall(PetscFinalize());
257:   return 0;
258: }

260: /*TEST

262:    test:

264:    test:
265:      requires: cuda
266:      suffix: seqdensecuda
267:      args: -mat_type seqdensecuda -rhs_mat_type seqdensecuda -ldl 0 -solver_type {{petsc cuda}}
268:      output_file: output/ex1_1.out

270:    test:
271:      requires: cuda
272:      suffix: seqdensecuda_2
273:      args: -ldl 0 -solver_type cuda
274:      output_file: output/ex1_1.out

276:    test:
277:      requires: cuda
278:      suffix: seqdensecuda_seqaijcusparse
279:      args: -mat_type seqaijcusparse -rhs_mat_type seqdensecuda -qr 0
280:      output_file: output/ex1_2.out

282:    test:
283:      requires: cuda viennacl
284:      suffix: seqdensecuda_seqaijviennacl
285:      args: -mat_type seqaijviennacl -rhs_mat_type seqdensecuda -qr 0
286:      output_file: output/ex1_2.out

288:    test:
289:      suffix: 4
290:      args: -m 10 -n 10
291:      output_file: output/ex1_1.out

293: TEST*/