Actual source code: mpiaijhipsparse.hip.cpp
1: /* Portions of this code are under:
2: Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
3: */
4: #include <../src/mat/impls/aij/mpi/mpiaij.h>
5: #include <../src/mat/impls/aij/seq/seqhipsparse/hipsparsematimpl.h>
6: #include <../src/mat/impls/aij/mpi/mpihipsparse/mpihipsparsematimpl.h>
7: #include <thrust/advance.h>
8: #include <thrust/partition.h>
9: #include <thrust/sort.h>
10: #include <thrust/unique.h>
11: #include <petscsf.h>
13: struct VecHIPEquals {
14: template <typename Tuple>
15: __host__ __device__ void operator()(Tuple t)
16: {
17: thrust::get<1>(t) = thrust::get<0>(t);
18: }
19: };
21: static PetscErrorCode MatCOOStructDestroy_MPIAIJCUSPARSE(void *data)
22: {
23: MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
25: PetscFunctionBegin;
26: PetscCall(PetscSFDestroy(&coo->sf));
27: PetscCallHIP(hipFree(coo->Ajmap1));
28: PetscCallHIP(hipFree(coo->Aperm1));
29: PetscCallHIP(hipFree(coo->Bjmap1));
30: PetscCallHIP(hipFree(coo->Bperm1));
31: PetscCallHIP(hipFree(coo->Aimap2));
32: PetscCallHIP(hipFree(coo->Ajmap2));
33: PetscCallHIP(hipFree(coo->Aperm2));
34: PetscCallHIP(hipFree(coo->Bimap2));
35: PetscCallHIP(hipFree(coo->Bjmap2));
36: PetscCallHIP(hipFree(coo->Bperm2));
37: PetscCallHIP(hipFree(coo->Cperm1));
38: PetscCallHIP(hipFree(coo->sendbuf));
39: PetscCallHIP(hipFree(coo->recvbuf));
40: PetscCall(PetscFree(coo));
41: PetscFunctionReturn(PETSC_SUCCESS);
42: }
44: static PetscErrorCode MatSetPreallocationCOO_MPIAIJHIPSPARSE(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
45: {
46: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data;
47: PetscBool dev_ij = PETSC_FALSE;
48: PetscMemType mtype = PETSC_MEMTYPE_HOST;
49: PetscInt *i, *j;
50: PetscContainer container_h, container_d;
51: MatCOOStruct_MPIAIJ *coo_h, *coo_d;
53: PetscFunctionBegin;
54: PetscCall(PetscFree(mpiaij->garray));
55: PetscCall(VecDestroy(&mpiaij->lvec));
56: #if defined(PETSC_USE_CTABLE)
57: PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
58: #else
59: PetscCall(PetscFree(mpiaij->colmap));
60: #endif
61: PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
62: mat->assembled = PETSC_FALSE;
63: mat->was_assembled = PETSC_FALSE;
64: PetscCall(PetscGetMemType(coo_i, &mtype));
65: if (PetscMemTypeDevice(mtype)) {
66: dev_ij = PETSC_TRUE;
67: PetscCall(PetscMalloc2(coo_n, &i, coo_n, &j));
68: PetscCallHIP(hipMemcpy(i, coo_i, coo_n * sizeof(PetscInt), hipMemcpyDeviceToHost));
69: PetscCallHIP(hipMemcpy(j, coo_j, coo_n * sizeof(PetscInt), hipMemcpyDeviceToHost));
70: } else {
71: i = coo_i;
72: j = coo_j;
73: }
75: PetscCall(MatSetPreallocationCOO_MPIAIJ(mat, coo_n, coo_i, coo_j));
76: if (dev_ij) PetscCall(PetscFree2(i, j));
77: mat->offloadmask = PETSC_OFFLOAD_CPU;
78: // Create the GPU memory
79: PetscCall(MatSeqAIJHIPSPARSECopyToGPU(mpiaij->A));
80: PetscCall(MatSeqAIJHIPSPARSECopyToGPU(mpiaij->B));
82: // Copy the COO struct to device
83: PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container_h));
84: PetscCall(PetscContainerGetPointer(container_h, (void **)&coo_h));
85: PetscCall(PetscMalloc1(1, &coo_d));
86: *coo_d = *coo_h; // do a shallow copy and then amend fields in coo_d
88: PetscCall(PetscObjectReference((PetscObject)coo_d->sf)); // Since we destroy the sf in both coo_h and coo_d
89: PetscCallHIP(hipMalloc((void **)&coo_d->Ajmap1, (coo_h->Annz + 1) * sizeof(PetscCount)));
90: PetscCallHIP(hipMalloc((void **)&coo_d->Aperm1, coo_h->Atot1 * sizeof(PetscCount)));
91: PetscCallHIP(hipMalloc((void **)&coo_d->Bjmap1, (coo_h->Bnnz + 1) * sizeof(PetscCount)));
92: PetscCallHIP(hipMalloc((void **)&coo_d->Bperm1, coo_h->Btot1 * sizeof(PetscCount)));
93: PetscCallHIP(hipMalloc((void **)&coo_d->Aimap2, coo_h->Annz2 * sizeof(PetscCount)));
94: PetscCallHIP(hipMalloc((void **)&coo_d->Ajmap2, (coo_h->Annz2 + 1) * sizeof(PetscCount)));
95: PetscCallHIP(hipMalloc((void **)&coo_d->Aperm2, coo_h->Atot2 * sizeof(PetscCount)));
96: PetscCallHIP(hipMalloc((void **)&coo_d->Bimap2, coo_h->Bnnz2 * sizeof(PetscCount)));
97: PetscCallHIP(hipMalloc((void **)&coo_d->Bjmap2, (coo_h->Bnnz2 + 1) * sizeof(PetscCount)));
98: PetscCallHIP(hipMalloc((void **)&coo_d->Bperm2, coo_h->Btot2 * sizeof(PetscCount)));
99: PetscCallHIP(hipMalloc((void **)&coo_d->Cperm1, coo_h->sendlen * sizeof(PetscCount)));
100: PetscCallHIP(hipMalloc((void **)&coo_d->sendbuf, coo_h->sendlen * sizeof(PetscScalar)));
101: PetscCallHIP(hipMalloc((void **)&coo_d->recvbuf, coo_h->recvlen * sizeof(PetscScalar)));
103: PetscCallHIP(hipMemcpy(coo_d->Ajmap1, coo_h->Ajmap1, (coo_h->Annz + 1) * sizeof(PetscCount), hipMemcpyHostToDevice));
104: PetscCallHIP(hipMemcpy(coo_d->Aperm1, coo_h->Aperm1, coo_h->Atot1 * sizeof(PetscCount), hipMemcpyHostToDevice));
105: PetscCallHIP(hipMemcpy(coo_d->Bjmap1, coo_h->Bjmap1, (coo_h->Bnnz + 1) * sizeof(PetscCount), hipMemcpyHostToDevice));
106: PetscCallHIP(hipMemcpy(coo_d->Bperm1, coo_h->Bperm1, coo_h->Btot1 * sizeof(PetscCount), hipMemcpyHostToDevice));
107: PetscCallHIP(hipMemcpy(coo_d->Aimap2, coo_h->Aimap2, coo_h->Annz2 * sizeof(PetscCount), hipMemcpyHostToDevice));
108: PetscCallHIP(hipMemcpy(coo_d->Ajmap2, coo_h->Ajmap2, (coo_h->Annz2 + 1) * sizeof(PetscCount), hipMemcpyHostToDevice));
109: PetscCallHIP(hipMemcpy(coo_d->Aperm2, coo_h->Aperm2, coo_h->Atot2 * sizeof(PetscCount), hipMemcpyHostToDevice));
110: PetscCallHIP(hipMemcpy(coo_d->Bimap2, coo_h->Bimap2, coo_h->Bnnz2 * sizeof(PetscCount), hipMemcpyHostToDevice));
111: PetscCallHIP(hipMemcpy(coo_d->Bjmap2, coo_h->Bjmap2, (coo_h->Bnnz2 + 1) * sizeof(PetscCount), hipMemcpyHostToDevice));
112: PetscCallHIP(hipMemcpy(coo_d->Bperm2, coo_h->Bperm2, coo_h->Btot2 * sizeof(PetscCount), hipMemcpyHostToDevice));
113: PetscCallHIP(hipMemcpy(coo_d->Cperm1, coo_h->Cperm1, coo_h->sendlen * sizeof(PetscCount), hipMemcpyHostToDevice));
115: // Put the COO struct in a container and then attach that to the matrix
116: PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container_d));
117: PetscCall(PetscContainerSetPointer(container_d, coo_d));
118: PetscCall(PetscContainerSetUserDestroy(container_d, MatCOOStructDestroy_MPIAIJCUSPARSE));
119: PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Device", (PetscObject)container_d));
120: PetscCall(PetscContainerDestroy(&container_d));
121: PetscFunctionReturn(PETSC_SUCCESS);
122: }
124: __global__ static void MatPackCOOValues(const PetscScalar kv[], PetscCount nnz, const PetscCount perm[], PetscScalar buf[])
125: {
126: PetscCount i = blockIdx.x * blockDim.x + threadIdx.x;
127: const PetscCount grid_size = gridDim.x * blockDim.x;
128: for (; i < nnz; i += grid_size) buf[i] = kv[perm[i]];
129: }
131: __global__ static void MatAddLocalCOOValues(const PetscScalar kv[], InsertMode imode, PetscCount Annz, const PetscCount Ajmap1[], const PetscCount Aperm1[], PetscScalar Aa[], PetscCount Bnnz, const PetscCount Bjmap1[], const PetscCount Bperm1[], PetscScalar Ba[])
132: {
133: PetscCount i = blockIdx.x * blockDim.x + threadIdx.x;
134: const PetscCount grid_size = gridDim.x * blockDim.x;
135: for (; i < Annz + Bnnz; i += grid_size) {
136: PetscScalar sum = 0.0;
137: if (i < Annz) {
138: for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += kv[Aperm1[k]];
139: Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
140: } else {
141: i -= Annz;
142: for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += kv[Bperm1[k]];
143: Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
144: }
145: }
146: }
148: __global__ static void MatAddRemoteCOOValues(const PetscScalar kv[], PetscCount Annz2, const PetscCount Aimap2[], const PetscCount Ajmap2[], const PetscCount Aperm2[], PetscScalar Aa[], PetscCount Bnnz2, const PetscCount Bimap2[], const PetscCount Bjmap2[], const PetscCount Bperm2[], PetscScalar Ba[])
149: {
150: PetscCount i = blockIdx.x * blockDim.x + threadIdx.x;
151: const PetscCount grid_size = gridDim.x * blockDim.x;
152: for (; i < Annz2 + Bnnz2; i += grid_size) {
153: if (i < Annz2) {
154: for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += kv[Aperm2[k]];
155: } else {
156: i -= Annz2;
157: for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += kv[Bperm2[k]];
158: }
159: }
160: }
162: static PetscErrorCode MatSetValuesCOO_MPIAIJHIPSPARSE(Mat mat, const PetscScalar v[], InsertMode imode)
163: {
164: Mat_MPIAIJ *mpiaij = static_cast<Mat_MPIAIJ *>(mat->data);
165: Mat A = mpiaij->A, B = mpiaij->B;
166: PetscScalar *Aa, *Ba;
167: const PetscScalar *v1 = v;
168: PetscMemType memtype;
169: PetscContainer container;
170: MatCOOStruct_MPIAIJ *coo;
172: PetscFunctionBegin;
173: PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Device", (PetscObject *)&container));
174: PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
175: PetscCall(PetscContainerGetPointer(container, (void **)&coo));
177: const auto &Annz = coo->Annz;
178: const auto &Annz2 = coo->Annz2;
179: const auto &Bnnz = coo->Bnnz;
180: const auto &Bnnz2 = coo->Bnnz2;
181: const auto &vsend = coo->sendbuf;
182: const auto &v2 = coo->recvbuf;
183: const auto &Ajmap1 = coo->Ajmap1;
184: const auto &Ajmap2 = coo->Ajmap2;
185: const auto &Aimap2 = coo->Aimap2;
186: const auto &Bjmap1 = coo->Bjmap1;
187: const auto &Bjmap2 = coo->Bjmap2;
188: const auto &Bimap2 = coo->Bimap2;
189: const auto &Aperm1 = coo->Aperm1;
190: const auto &Aperm2 = coo->Aperm2;
191: const auto &Bperm1 = coo->Bperm1;
192: const auto &Bperm2 = coo->Bperm2;
193: const auto &Cperm1 = coo->Cperm1;
195: PetscCall(PetscGetMemType(v, &memtype));
196: if (PetscMemTypeHost(memtype)) { /* If user gave v[] in host, we need to copy it to device */
197: PetscCallHIP(hipMalloc((void **)&v1, coo->n * sizeof(PetscScalar)));
198: PetscCallHIP(hipMemcpy((void *)v1, v, coo->n * sizeof(PetscScalar), hipMemcpyHostToDevice));
199: }
201: if (imode == INSERT_VALUES) {
202: PetscCall(MatSeqAIJHIPSPARSEGetArrayWrite(A, &Aa)); /* write matrix values */
203: PetscCall(MatSeqAIJHIPSPARSEGetArrayWrite(B, &Ba));
204: } else {
205: PetscCall(MatSeqAIJHIPSPARSEGetArray(A, &Aa)); /* read & write matrix values */
206: PetscCall(MatSeqAIJHIPSPARSEGetArray(B, &Ba));
207: }
209: PetscCall(PetscLogGpuTimeBegin());
210: /* Pack entries to be sent to remote */
211: if (coo->sendlen) {
212: hipLaunchKernelGGL(HIP_KERNEL_NAME(MatPackCOOValues), dim3((coo->sendlen + 255) / 256), dim3(256), 0, PetscDefaultHipStream, v1, coo->sendlen, Cperm1, vsend);
213: PetscCallHIP(hipPeekAtLastError());
214: }
216: /* Send remote entries to their owner and overlap the communication with local computation */
217: PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HIP, vsend, PETSC_MEMTYPE_HIP, v2, MPI_REPLACE));
218: /* Add local entries to A and B */
219: if (Annz + Bnnz > 0) {
220: hipLaunchKernelGGL(HIP_KERNEL_NAME(MatAddLocalCOOValues), dim3((Annz + Bnnz + 255) / 256), dim3(256), 0, PetscDefaultHipStream, v1, imode, Annz, Ajmap1, Aperm1, Aa, Bnnz, Bjmap1, Bperm1, Ba);
221: PetscCallHIP(hipPeekAtLastError());
222: }
223: PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, vsend, v2, MPI_REPLACE));
225: /* Add received remote entries to A and B */
226: if (Annz2 + Bnnz2 > 0) {
227: hipLaunchKernelGGL(HIP_KERNEL_NAME(MatAddRemoteCOOValues), dim3((Annz2 + Bnnz2 + 255) / 256), dim3(256), 0, PetscDefaultHipStream, v2, Annz2, Aimap2, Ajmap2, Aperm2, Aa, Bnnz2, Bimap2, Bjmap2, Bperm2, Ba);
228: PetscCallHIP(hipPeekAtLastError());
229: }
230: PetscCall(PetscLogGpuTimeEnd());
232: if (imode == INSERT_VALUES) {
233: PetscCall(MatSeqAIJHIPSPARSERestoreArrayWrite(A, &Aa));
234: PetscCall(MatSeqAIJHIPSPARSERestoreArrayWrite(B, &Ba));
235: } else {
236: PetscCall(MatSeqAIJHIPSPARSERestoreArray(A, &Aa));
237: PetscCall(MatSeqAIJHIPSPARSERestoreArray(B, &Ba));
238: }
239: if (PetscMemTypeHost(memtype)) PetscCallHIP(hipFree((void *)v1));
240: mat->offloadmask = PETSC_OFFLOAD_GPU;
241: PetscFunctionReturn(PETSC_SUCCESS);
242: }
244: static PetscErrorCode MatMPIAIJGetLocalMatMerge_MPIAIJHIPSPARSE(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
245: {
246: Mat Ad, Ao;
247: const PetscInt *cmap;
249: PetscFunctionBegin;
250: PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
251: PetscCall(MatSeqAIJHIPSPARSEMergeMats(Ad, Ao, scall, A_loc));
252: if (glob) {
253: PetscInt cst, i, dn, on, *gidx;
255: PetscCall(MatGetLocalSize(Ad, NULL, &dn));
256: PetscCall(MatGetLocalSize(Ao, NULL, &on));
257: PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
258: PetscCall(PetscMalloc1(dn + on, &gidx));
259: for (i = 0; i < dn; i++) gidx[i] = cst + i;
260: for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
261: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
262: }
263: PetscFunctionReturn(PETSC_SUCCESS);
264: }
266: static PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJHIPSPARSE(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
267: {
268: Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
269: Mat_MPIAIJHIPSPARSE *hipsparseStruct = (Mat_MPIAIJHIPSPARSE *)b->spptr;
270: PetscInt i;
272: PetscFunctionBegin;
273: if (B->hash_active) {
274: B->ops[0] = b->cops;
275: B->hash_active = PETSC_FALSE;
276: }
277: PetscCall(PetscLayoutSetUp(B->rmap));
278: PetscCall(PetscLayoutSetUp(B->cmap));
279: if (PetscDefined(USE_DEBUG) && d_nnz) {
280: for (i = 0; i < B->rmap->n; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
281: }
282: if (PetscDefined(USE_DEBUG) && o_nnz) {
283: for (i = 0; i < B->rmap->n; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
284: }
285: #if defined(PETSC_USE_CTABLE)
286: PetscCall(PetscHMapIDestroy(&b->colmap));
287: #else
288: PetscCall(PetscFree(b->colmap));
289: #endif
290: PetscCall(PetscFree(b->garray));
291: PetscCall(VecDestroy(&b->lvec));
292: PetscCall(VecScatterDestroy(&b->Mvctx));
293: /* Because the B will have been resized we simply destroy it and create a new one each time */
294: PetscCall(MatDestroy(&b->B));
295: if (!b->A) {
296: PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
297: PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
298: }
299: if (!b->B) {
300: PetscMPIInt size;
301: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
302: PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
303: PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
304: }
305: PetscCall(MatSetType(b->A, MATSEQAIJHIPSPARSE));
306: PetscCall(MatSetType(b->B, MATSEQAIJHIPSPARSE));
307: PetscCall(MatBindToCPU(b->A, B->boundtocpu));
308: PetscCall(MatBindToCPU(b->B, B->boundtocpu));
309: PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
310: PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
311: PetscCall(MatHIPSPARSESetFormat(b->A, MAT_HIPSPARSE_MULT, hipsparseStruct->diagGPUMatFormat));
312: PetscCall(MatHIPSPARSESetFormat(b->B, MAT_HIPSPARSE_MULT, hipsparseStruct->offdiagGPUMatFormat));
313: B->preallocated = PETSC_TRUE;
314: B->was_assembled = PETSC_FALSE;
315: B->assembled = PETSC_FALSE;
316: PetscFunctionReturn(PETSC_SUCCESS);
317: }
319: static PetscErrorCode MatMult_MPIAIJHIPSPARSE(Mat A, Vec xx, Vec yy)
320: {
321: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
323: PetscFunctionBegin;
324: PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
325: PetscCall((*a->A->ops->mult)(a->A, xx, yy));
326: PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
327: PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
328: PetscFunctionReturn(PETSC_SUCCESS);
329: }
331: static PetscErrorCode MatZeroEntries_MPIAIJHIPSPARSE(Mat A)
332: {
333: Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
335: PetscFunctionBegin;
336: PetscCall(MatZeroEntries(l->A));
337: PetscCall(MatZeroEntries(l->B));
338: PetscFunctionReturn(PETSC_SUCCESS);
339: }
341: static PetscErrorCode MatMultAdd_MPIAIJHIPSPARSE(Mat A, Vec xx, Vec yy, Vec zz)
342: {
343: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
345: PetscFunctionBegin;
346: PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
347: PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
348: PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
349: PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
350: PetscFunctionReturn(PETSC_SUCCESS);
351: }
353: static PetscErrorCode MatMultTranspose_MPIAIJHIPSPARSE(Mat A, Vec xx, Vec yy)
354: {
355: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
357: PetscFunctionBegin;
358: PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
359: PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
360: PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
361: PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
362: PetscFunctionReturn(PETSC_SUCCESS);
363: }
365: static PetscErrorCode MatHIPSPARSESetFormat_MPIAIJHIPSPARSE(Mat A, MatHIPSPARSEFormatOperation op, MatHIPSPARSEStorageFormat format)
366: {
367: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
368: Mat_MPIAIJHIPSPARSE *hipsparseStruct = (Mat_MPIAIJHIPSPARSE *)a->spptr;
370: PetscFunctionBegin;
371: switch (op) {
372: case MAT_HIPSPARSE_MULT_DIAG:
373: hipsparseStruct->diagGPUMatFormat = format;
374: break;
375: case MAT_HIPSPARSE_MULT_OFFDIAG:
376: hipsparseStruct->offdiagGPUMatFormat = format;
377: break;
378: case MAT_HIPSPARSE_ALL:
379: hipsparseStruct->diagGPUMatFormat = format;
380: hipsparseStruct->offdiagGPUMatFormat = format;
381: break;
382: default:
383: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unsupported operation %d for MatHIPSPARSEFormatOperation. Only MAT_HIPSPARSE_MULT_DIAG, MAT_HIPSPARSE_MULT_DIAG, and MAT_HIPSPARSE_MULT_ALL are currently supported.", op);
384: }
385: PetscFunctionReturn(PETSC_SUCCESS);
386: }
388: static PetscErrorCode MatSetFromOptions_MPIAIJHIPSPARSE(Mat A, PetscOptionItems *PetscOptionsObject)
389: {
390: MatHIPSPARSEStorageFormat format;
391: PetscBool flg;
392: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
393: Mat_MPIAIJHIPSPARSE *hipsparseStruct = (Mat_MPIAIJHIPSPARSE *)a->spptr;
395: PetscFunctionBegin;
396: PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJHIPSPARSE options");
397: if (A->factortype == MAT_FACTOR_NONE) {
398: PetscCall(PetscOptionsEnum("-mat_hipsparse_mult_diag_storage_format", "sets storage format of the diagonal blocks of (mpi)aijhipsparse gpu matrices for SpMV", "MatHIPSPARSESetFormat", MatHIPSPARSEStorageFormats, (PetscEnum)hipsparseStruct->diagGPUMatFormat, (PetscEnum *)&format, &flg));
399: if (flg) PetscCall(MatHIPSPARSESetFormat(A, MAT_HIPSPARSE_MULT_DIAG, format));
400: PetscCall(PetscOptionsEnum("-mat_hipsparse_mult_offdiag_storage_format", "sets storage format of the off-diagonal blocks (mpi)aijhipsparse gpu matrices for SpMV", "MatHIPSPARSESetFormat", MatHIPSPARSEStorageFormats, (PetscEnum)hipsparseStruct->offdiagGPUMatFormat, (PetscEnum *)&format, &flg));
401: if (flg) PetscCall(MatHIPSPARSESetFormat(A, MAT_HIPSPARSE_MULT_OFFDIAG, format));
402: PetscCall(PetscOptionsEnum("-mat_hipsparse_storage_format", "sets storage format of the diagonal and off-diagonal blocks (mpi)aijhipsparse gpu matrices for SpMV", "MatHIPSPARSESetFormat", MatHIPSPARSEStorageFormats, (PetscEnum)hipsparseStruct->diagGPUMatFormat, (PetscEnum *)&format, &flg));
403: if (flg) PetscCall(MatHIPSPARSESetFormat(A, MAT_HIPSPARSE_ALL, format));
404: }
405: PetscOptionsHeadEnd();
406: PetscFunctionReturn(PETSC_SUCCESS);
407: }
409: static PetscErrorCode MatAssemblyEnd_MPIAIJHIPSPARSE(Mat A, MatAssemblyType mode)
410: {
411: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
413: PetscFunctionBegin;
414: PetscCall(MatAssemblyEnd_MPIAIJ(A, mode));
415: if (mpiaij->lvec) PetscCall(VecSetType(mpiaij->lvec, VECSEQHIP));
416: PetscFunctionReturn(PETSC_SUCCESS);
417: }
419: static PetscErrorCode MatDestroy_MPIAIJHIPSPARSE(Mat A)
420: {
421: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data;
422: Mat_MPIAIJHIPSPARSE *hipsparseStruct = (Mat_MPIAIJHIPSPARSE *)aij->spptr;
424: PetscFunctionBegin;
425: PetscCheck(hipsparseStruct, PETSC_COMM_SELF, PETSC_ERR_COR, "Missing spptr");
426: PetscCallCXX(delete hipsparseStruct);
427: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", NULL));
428: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", NULL));
429: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSetPreallocationCOO_C", NULL));
430: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSetValuesCOO_C", NULL));
431: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHIPSPARSESetFormat_C", NULL));
432: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpiaijhipsparse_hypre_C", NULL));
433: PetscCall(MatDestroy_MPIAIJ(A));
434: PetscFunctionReturn(PETSC_SUCCESS);
435: }
437: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat B, MatType mtype, MatReuse reuse, Mat *newmat)
438: {
439: Mat_MPIAIJ *a;
440: Mat A;
442: PetscFunctionBegin;
443: PetscCall(PetscDeviceInitialize(PETSC_DEVICE_HIP));
444: if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(B, MAT_COPY_VALUES, newmat));
445: else if (reuse == MAT_REUSE_MATRIX) PetscCall(MatCopy(B, *newmat, SAME_NONZERO_PATTERN));
446: A = *newmat;
447: A->boundtocpu = PETSC_FALSE;
448: PetscCall(PetscFree(A->defaultvectype));
449: PetscCall(PetscStrallocpy(VECHIP, &A->defaultvectype));
451: a = (Mat_MPIAIJ *)A->data;
452: if (a->A) PetscCall(MatSetType(a->A, MATSEQAIJHIPSPARSE));
453: if (a->B) PetscCall(MatSetType(a->B, MATSEQAIJHIPSPARSE));
454: if (a->lvec) PetscCall(VecSetType(a->lvec, VECSEQHIP));
456: if (reuse != MAT_REUSE_MATRIX && !a->spptr) PetscCallCXX(a->spptr = new Mat_MPIAIJHIPSPARSE);
458: A->ops->assemblyend = MatAssemblyEnd_MPIAIJHIPSPARSE;
459: A->ops->mult = MatMult_MPIAIJHIPSPARSE;
460: A->ops->multadd = MatMultAdd_MPIAIJHIPSPARSE;
461: A->ops->multtranspose = MatMultTranspose_MPIAIJHIPSPARSE;
462: A->ops->setfromoptions = MatSetFromOptions_MPIAIJHIPSPARSE;
463: A->ops->destroy = MatDestroy_MPIAIJHIPSPARSE;
464: A->ops->zeroentries = MatZeroEntries_MPIAIJHIPSPARSE;
465: A->ops->productsetfromoptions = MatProductSetFromOptions_MPIAIJBACKEND;
467: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIAIJHIPSPARSE));
468: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", MatMPIAIJGetLocalMatMerge_MPIAIJHIPSPARSE));
469: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJHIPSPARSE));
470: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHIPSPARSESetFormat_C", MatHIPSPARSESetFormat_MPIAIJHIPSPARSE));
471: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJHIPSPARSE));
472: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJHIPSPARSE));
473: #if defined(PETSC_HAVE_HYPRE)
474: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpiaijhipsparse_hypre_C", MatConvert_AIJ_HYPRE));
475: #endif
476: PetscFunctionReturn(PETSC_SUCCESS);
477: }
479: PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJHIPSPARSE(Mat A)
480: {
481: PetscFunctionBegin;
482: PetscCall(PetscDeviceInitialize(PETSC_DEVICE_HIP));
483: PetscCall(MatCreate_MPIAIJ(A));
484: PetscCall(MatConvert_MPIAIJ_MPIAIJHIPSPARSE(A, MATMPIAIJHIPSPARSE, MAT_INPLACE_MATRIX, &A));
485: PetscFunctionReturn(PETSC_SUCCESS);
486: }
488: /*@
489: MatCreateAIJHIPSPARSE - Creates a sparse matrix in AIJ (compressed row) format
490: (the default parallel PETSc format).
492: Collective
494: Input Parameters:
495: + comm - MPI communicator, set to `PETSC_COMM_SELF`
496: . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
497: This value should be the same as the local size used in creating the
498: y vector for the matrix-vector product y = Ax.
499: . n - This value should be the same as the local size used in creating the
500: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
501: calculated if `N` is given) For square matrices `n` is almost always `m`.
502: . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
503: . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
504: . d_nz - number of nonzeros per row (same for all rows), for the "diagonal" portion of the matrix
505: . d_nnz - array containing the number of nonzeros in the various rows (possibly different for each row) or `NULL`, for the "diagonal" portion of the matrix
506: . o_nz - number of nonzeros per row (same for all rows), for the "off-diagonal" portion of the matrix
507: - o_nnz - array containing the number of nonzeros in the various rows (possibly different for each row) or `NULL`, for the "off-diagonal" portion of the matrix
509: Output Parameter:
510: . A - the matrix
512: Level: intermediate
514: Notes:
515: This matrix will ultimately pushed down to AMD GPUs and use the HIPSPARSE library for
516: calculations. For good matrix assembly performance the user should preallocate the matrix
517: storage by setting the parameter `nz` (or the array `nnz`).
519: It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
520: MatXXXXSetPreallocation() paradigm instead of this routine directly.
521: [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
523: If `d_nnz` (`o_nnz`) is given then `d_nz` (`o_nz`) is ignored
525: The `MATAIJ` format (compressed row storage), is fully compatible with standard Fortran
526: storage. That is, the stored row and column indices can begin at
527: either one (as in Fortran) or zero.
529: Specify the preallocated storage with either `d_nz` (`o_nz`) or `d_nnz` (`o_nnz`) (not both).
530: Set `d_nz` (`o_nz`) = `PETSC_DEFAULT` and `d_nnz` (`o_nnz`) = `NULL` for PETSc to control dynamic memory
531: allocation.
533: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateAIJ()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()`, `MATMPIAIJHIPSPARSE`, `MATAIJHIPSPARSE`
534: @*/
535: PetscErrorCode MatCreateAIJHIPSPARSE(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
536: {
537: PetscMPIInt size;
539: PetscFunctionBegin;
540: PetscCall(MatCreate(comm, A));
541: PetscCall(MatSetSizes(*A, m, n, M, N));
542: PetscCallMPI(MPI_Comm_size(comm, &size));
543: if (size > 1) {
544: PetscCall(MatSetType(*A, MATMPIAIJHIPSPARSE));
545: PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
546: } else {
547: PetscCall(MatSetType(*A, MATSEQAIJHIPSPARSE));
548: PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
549: }
550: PetscFunctionReturn(PETSC_SUCCESS);
551: }
553: /*MC
554: MATAIJHIPSPARSE - A matrix type to be used for sparse matrices; it is as same as `MATMPIAIJHIPSPARSE`.
556: A matrix type whose data resides on GPUs. These matrices can be in either
557: CSR, ELL, or Hybrid format. All matrix calculations are performed on AMD GPUs using the HIPSPARSE library.
559: This matrix type is identical to `MATSEQAIJHIPSPARSE` when constructed with a single process communicator,
560: and `MATMPIAIJHIPSPARSE` otherwise. As a result, for single process communicators,
561: `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
562: for communicators controlling multiple processes. It is recommended that you call both of
563: the above preallocation routines for simplicity.
565: Options Database Keys:
566: + -mat_type mpiaijhipsparse - sets the matrix type to `MATMPIAIJHIPSPARSE`
567: . -mat_hipsparse_storage_format csr - sets the storage format of diagonal and off-diagonal matrices. Other options include ell (ellpack) or hyb (hybrid).
568: . -mat_hipsparse_mult_diag_storage_format csr - sets the storage format of diagonal matrix. Other options include ell (ellpack) or hyb (hybrid).
569: - -mat_hipsparse_mult_offdiag_storage_format csr - sets the storage format of off-diagonal matrix. Other options include ell (ellpack) or hyb (hybrid).
571: Level: beginner
573: .seealso: [](ch_matrices), `Mat`, `MatCreateAIJHIPSPARSE()`, `MATSEQAIJHIPSPARSE`, `MATMPIAIJHIPSPARSE`, `MatCreateSeqAIJHIPSPARSE()`, `MatHIPSPARSESetFormat()`, `MatHIPSPARSEStorageFormat`, `MatHIPSPARSEFormatOperation`
574: M*/
576: /*MC
577: MATMPIAIJHIPSPARSE - A matrix type to be used for sparse matrices; it is as same as `MATAIJHIPSPARSE`.
579: Level: beginner
581: .seealso: [](ch_matrices), `Mat`, `MATAIJHIPSPARSE`, `MATSEQAIJHIPSPARSE`
582: M*/