Actual source code: ex69.c
1: static char help[] = "Tests MatCreateDenseCUDA(), MatDenseCUDAPlaceArray(), MatDenseCUDAReplaceArray(), and MatDenseCUDAResetArray()\n\
2: or MatCreateDenseHIP(), MatDenseHIPPlaceArray(), MatDenseHIPReplaceArray(), and MatDenseHIPResetArray(),\n\
3: as well as MatDiagonalScale() on device.\n";
5: #include <petscmat.h>
6: #include <petscpkg_version.h>
8: #if !defined(PETSC_PKG_CUDA_VERSION_GE)
9: #define PETSC_PKG_CUDA_VERSION_GE(MAJ, MIN, PAT) 0
10: #endif
11: #define PetscConcat3(a, b, c) PetscConcat(PetscConcat(a, b), c)
12: #if PetscDefined(HAVE_CUDA)
13: #define DEVICE_TAG CUDA
14: #elif PetscDefined(HAVE_HIP)
15: #define DEVICE_TAG HIP
16: #else
17: #define DEVICE_TAG
18: #endif
19: #define VecDeviceGetArray PetscConcat3(Vec, DEVICE_TAG, GetArray)
20: #define VecDeviceRestoreArray PetscConcat3(Vec, DEVICE_TAG, RestoreArray)
21: #define MatCreateDenseDevice PetscConcat(MatCreateDense, DEVICE_TAG)
22: #define MatDenseDeviceReplaceArray PetscConcat3(MatDense, DEVICE_TAG, ReplaceArray)
23: #define MatDenseDevicePlaceArray PetscConcat3(MatDense, DEVICE_TAG, PlaceArray)
24: #define MatDenseDeviceResetArray PetscConcat3(MatDense, DEVICE_TAG, ResetArray)
26: static PetscErrorCode MatMult_S(Mat S, Vec x, Vec y)
27: {
28: Mat A;
30: PetscFunctionBeginUser;
31: PetscCall(MatShellGetContext(S, &A));
32: PetscCall(MatMult(A, x, y));
33: PetscFunctionReturn(PETSC_SUCCESS);
34: }
36: static PetscBool test_cusparse_transgen = PETSC_FALSE;
38: static PetscErrorCode MatMultTranspose_S(Mat S, Vec x, Vec y)
39: {
40: Mat A;
42: PetscFunctionBeginUser;
43: PetscCall(MatShellGetContext(S, &A));
44: PetscCall(MatMultTranspose(A, x, y));
46: /* alternate transgen true and false to test code logic */
47: PetscCall(MatSetOption(A, MAT_FORM_EXPLICIT_TRANSPOSE, test_cusparse_transgen));
48: test_cusparse_transgen = (PetscBool)!test_cusparse_transgen;
49: PetscFunctionReturn(PETSC_SUCCESS);
50: }
52: int main(int argc, char **argv)
53: {
54: Mat A, B, C, S;
55: Vec t, v, ll, rr;
56: PetscScalar *vv, *aa;
57: // We met a mysterious cudaErrorMisalignedAddress error on some systems with cuda-12.0,1 but not
58: // with prior cuda-11.2,3,7,8 versions. Making nloc an even number somehow 'fixes' the problem.
59: // See more at https://gitlab.com/petsc/petsc/-/merge_requests/6225
60: #if PETSC_PKG_CUDA_VERSION_GE(12, 0, 0)
61: PetscInt n = 32;
62: #else
63: PetscInt n = 30;
64: #endif
65: PetscInt k = 6, l = 0, i, Istart, Iend, nloc, bs, test = 1;
66: PetscBool flg, reset, use_shell = PETSC_FALSE;
67: VecType vtype;
69: PetscFunctionBeginUser;
70: PetscCall(PetscInitialize(&argc, &argv, NULL, help));
71: PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
72: PetscCall(PetscOptionsGetInt(NULL, NULL, "-k", &k, NULL));
73: PetscCall(PetscOptionsGetInt(NULL, NULL, "-l", &l, NULL));
74: PetscCall(PetscOptionsGetInt(NULL, NULL, "-test", &test, NULL));
75: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_shell", &use_shell, NULL));
76: PetscCheck(k >= 0, PETSC_COMM_WORLD, PETSC_ERR_USER, "k %" PetscInt_FMT " must be positive", k);
77: PetscCheck(l >= 0, PETSC_COMM_WORLD, PETSC_ERR_USER, "l %" PetscInt_FMT " must be positive", l);
78: PetscCheck(l <= k, PETSC_COMM_WORLD, PETSC_ERR_USER, "l %" PetscInt_FMT " must be smaller or equal than k %" PetscInt_FMT, l, k);
80: /* sparse matrix */
81: PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
82: PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, n, n));
83: PetscCall(MatSetType(A, PetscDefined(HAVE_CUDA) ? MATAIJCUSPARSE : MATAIJHIPSPARSE));
84: PetscCall(MatSetOptionsPrefix(A, "A_"));
85: PetscCall(MatSetFromOptions(A));
86: PetscCall(MatSetUp(A));
88: /* test special case for MATSEQAIJCUSPARSE and MATSEQAIJHIPSPARSE to generate explicit transpose (not default) */
89: PetscCall(MatSetOption(A, MAT_FORM_EXPLICIT_TRANSPOSE, PETSC_TRUE));
91: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
92: for (i = Istart; i < Iend; i++) {
93: if (i > 0) PetscCall(MatSetValue(A, i, i - 1, -1.0, INSERT_VALUES));
94: if (i < n - 1) PetscCall(MatSetValue(A, i, i + 1, -1.0, INSERT_VALUES));
95: PetscCall(MatSetValue(A, i, i, 2.0, INSERT_VALUES));
96: }
97: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
98: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
100: /* template vector */
101: PetscCall(MatCreateVecs(A, NULL, &t));
102: PetscCall(VecGetType(t, &vtype));
104: /* long vector, contains the stacked columns of an nxk dense matrix */
105: PetscCall(VecGetLocalSize(t, &nloc));
106: PetscCall(VecGetBlockSize(t, &bs));
107: PetscCall(VecCreate(PetscObjectComm((PetscObject)t), &v));
108: PetscCall(VecSetType(v, vtype));
109: PetscCall(VecSetSizes(v, k * nloc, k * n));
110: PetscCall(VecSetBlockSize(v, bs));
111: PetscCall(VecSetRandom(v, NULL));
113: /* dense matrix that contains the columns of v */
114: PetscCall(VecDeviceGetArray(v, &vv));
116: /* test few cases for MATDENSECUDA and MATDENSEHIP handling pointers */
117: switch (test) {
118: case 1:
119: PetscCall(MatCreateDenseDevice(PetscObjectComm((PetscObject)v), nloc, PETSC_DECIDE, n, k - l, vv, &B)); /* pass a pointer to avoid allocation of storage */
120: PetscCall(MatDenseDeviceReplaceArray(B, NULL)); /* replace with a null pointer, the value after BVRestoreMat */
121: PetscCall(MatDenseDevicePlaceArray(B, vv + l * nloc)); /* set the actual pointer */
122: reset = PETSC_TRUE;
123: break;
124: case 2:
125: PetscCall(MatCreateDenseDevice(PetscObjectComm((PetscObject)v), nloc, PETSC_DECIDE, n, k - l, NULL, &B));
126: PetscCall(MatDenseDevicePlaceArray(B, vv + l * nloc)); /* set the actual pointer */
127: reset = PETSC_TRUE;
128: break;
129: default:
130: PetscCall(MatCreateDenseDevice(PetscObjectComm((PetscObject)v), nloc, PETSC_DECIDE, n, k - l, vv + l * nloc, &B));
131: reset = PETSC_FALSE;
132: break;
133: }
135: /* test MatMatMult() */
136: if (use_shell) {
137: /* we could have called the general converter below, but we explicitly set the operations
138: ourselves to test MatProductSymbolic_X_Dense, MatProductNumeric_X_Dense code */
139: /* PetscCall(MatConvert(A, MATSHELL, MAT_INITIAL_MATRIX, &S)); */
140: PetscCall(MatCreateShell(PetscObjectComm((PetscObject)v), nloc, nloc, n, n, A, &S));
141: PetscCall(MatShellSetOperation(S, MATOP_MULT, (PetscErrorCodeFn *)MatMult_S));
142: PetscCall(MatShellSetOperation(S, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_S));
143: PetscCall(MatShellSetVecType(S, vtype));
144: } else {
145: PetscCall(PetscObjectReference((PetscObject)A));
146: S = A;
147: }
149: PetscCall(MatCreateDenseDevice(PetscObjectComm((PetscObject)v), nloc, PETSC_DECIDE, n, k - l, NULL, &C));
151: /* test MatMatMult() */
152: PetscCall(MatProductCreateWithMat(S, B, NULL, C));
153: PetscCall(MatProductSetType(C, MATPRODUCT_AB));
154: PetscCall(MatProductSetFromOptions(C));
155: PetscCall(MatProductSymbolic(C));
156: PetscCall(MatProductNumeric(C));
157: PetscCall(MatMatMultEqual(S, B, C, 10, &flg));
158: if (!flg) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error MatMatMult()\n"));
160: /* test MatTransposeMatMult() */
161: PetscCall(MatProductCreateWithMat(S, B, NULL, C));
162: PetscCall(MatProductSetType(C, MATPRODUCT_AtB));
163: PetscCall(MatProductSetFromOptions(C));
164: PetscCall(MatProductSymbolic(C));
165: PetscCall(MatProductNumeric(C));
166: PetscCall(MatTransposeMatMultEqual(S, B, C, 10, &flg));
167: if (!flg) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error MatTransposeMatMult()\n"));
169: PetscCall(MatDestroy(&C));
170: PetscCall(MatDestroy(&S));
172: /* test MatDiagonalScale() */
173: PetscCall(MatDuplicate(B, MAT_COPY_VALUES, &C));
174: PetscCall(MatBindToCPU(C, PETSC_TRUE));
175: PetscCall(MatCreateVecs(B, &rr, &ll));
176: PetscCall(VecSetRandom(ll, NULL));
177: PetscCall(VecSetRandom(rr, NULL));
178: PetscCall(MatDiagonalScale(C, ll, rr));
179: PetscCall(MatDiagonalScale(B, ll, rr));
180: PetscCall(MatMultEqual(B, C, 10, &flg));
181: if (!flg) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error MatDiagonalScale()\n"));
183: PetscCall(VecDestroy(&ll));
184: PetscCall(VecDestroy(&rr));
185: PetscCall(MatDestroy(&C));
187: /* finished using B */
188: PetscCall(MatDenseGetArrayAndMemType(B, &aa, NULL));
189: PetscCheck(vv == aa - l * nloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Wrong array");
190: PetscCall(MatDenseRestoreArrayAndMemType(B, &aa));
191: if (reset) PetscCall(MatDenseDeviceResetArray(B));
192: PetscCall(VecDeviceRestoreArray(v, &vv));
194: if (test == 1) {
195: PetscCall(MatDenseGetArrayAndMemType(B, &aa, NULL));
196: PetscCheck(!aa, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected a null pointer");
197: PetscCall(MatDenseRestoreArrayAndMemType(B, &aa));
198: }
200: /* free work space */
201: PetscCall(MatDestroy(&B));
202: PetscCall(MatDestroy(&A));
203: PetscCall(VecDestroy(&t));
204: PetscCall(VecDestroy(&v));
205: PetscCall(PetscFinalize());
206: return 0;
207: }
209: /*TEST
211: build:
212: requires: defined(PETSC_DEVICELANGUAGE_CXX)
214: testset:
215: nsize: {{1 2}}
216: args: -test {{0 1 2}} -k 6 -l {{0 5}} -use_shell {{0 1}}
217: output_file: output/empty.out
218: test:
219: requires: cuda
220: suffix: 1_cuda
221: args: -A_mat_type {{aij aijcusparse}}
222: test:
223: requires: hip
224: suffix: 1_hip
225: args: -A_mat_type aijhipsparse
227: TEST*/