Actual source code: mpisellcuda.cu
  1: #include <petscconf.h>
  2: #include <petscdevice.h>
  3: #include <../src/mat/impls/sell/mpi/mpisell.h>

  5: static PetscErrorCode MatMPISELLSetPreallocation_MPISELLCUDA(Mat B, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[])
  6: {
  7:   Mat_MPISELL *b = (Mat_MPISELL *)B->data;

  9:   PetscFunctionBegin;
 10:   PetscCall(PetscLayoutSetUp(B->rmap));
 11:   PetscCall(PetscLayoutSetUp(B->cmap));

 13:   if (!B->preallocated) {
 14:     /* Explicitly create 2 MATSEQSELLCUDA matrices. */
 15:     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
 16:     PetscCall(MatBindToCPU(b->A, B->boundtocpu));
 17:     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
 18:     PetscCall(MatSetType(b->A, MATSEQSELLCUDA));
 19:     PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
 20:     PetscCall(MatBindToCPU(b->B, B->boundtocpu));
 21:     PetscCall(MatSetSizes(b->B, B->rmap->n, B->cmap->N, B->rmap->n, B->cmap->N));
 22:     PetscCall(MatSetType(b->B, MATSEQSELLCUDA));
 23:   }
 24:   PetscCall(MatSeqSELLSetPreallocation(b->A, d_rlenmax, d_rlen));
 25:   PetscCall(MatSeqSELLSetPreallocation(b->B, o_rlenmax, o_rlen));
 26:   B->preallocated  = PETSC_TRUE;
 27:   B->was_assembled = PETSC_FALSE;
 28:   B->assembled     = PETSC_FALSE;
 29:   PetscFunctionReturn(PETSC_SUCCESS);
 30: }

 32: static PetscErrorCode MatSetFromOptions_MPISELLCUDA(Mat, PetscOptionItems)
 33: {
 34:   return PETSC_SUCCESS;
 35: }

 37: static PetscErrorCode MatAssemblyEnd_MPISELLCUDA(Mat A, MatAssemblyType mode)
 38: {
 39:   PetscFunctionBegin;
 40:   PetscCall(MatAssemblyEnd_MPISELL(A, mode));
 41:   if (!A->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(VecSetType(((Mat_MPISELL *)A->data)->lvec, VECSEQCUDA));
 42:   PetscFunctionReturn(PETSC_SUCCESS);
 43: }

 45: static PetscErrorCode MatDestroy_MPISELLCUDA(Mat A)
 46: {
 47:   PetscFunctionBegin;
 48:   PetscCall(MatDestroy_MPISELL(A));
 49:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", NULL));
 50:   PetscFunctionReturn(PETSC_SUCCESS);
 51: }

 53: PETSC_INTERN PetscErrorCode MatConvert_MPISELL_MPISELLCUDA(Mat B, MatType, MatReuse reuse, Mat *newmat)
 54: {
 55:   Mat_MPISELL *a;
 56:   Mat          A;

 58:   PetscFunctionBegin;
 59:   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
 60:   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(B, MAT_COPY_VALUES, newmat));
 61:   else if (reuse == MAT_REUSE_MATRIX) PetscCall(MatCopy(B, *newmat, SAME_NONZERO_PATTERN));
 62:   A             = *newmat;
 63:   A->boundtocpu = PETSC_FALSE;
 64:   PetscCall(PetscFree(A->defaultvectype));
 65:   PetscCall(PetscStrallocpy(VECCUDA, &A->defaultvectype));

 67:   a = (Mat_MPISELL *)A->data;
 68:   if (a->A) PetscCall(MatSetType(a->A, MATSEQSELLCUDA));
 69:   if (a->B) PetscCall(MatSetType(a->B, MATSEQSELLCUDA));
 70:   if (a->lvec) PetscCall(VecSetType(a->lvec, VECSEQCUDA));

 72:   A->ops->assemblyend    = MatAssemblyEnd_MPISELLCUDA;
 73:   A->ops->setfromoptions = MatSetFromOptions_MPISELLCUDA;
 74:   A->ops->destroy        = MatDestroy_MPISELLCUDA;

 76:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPISELLCUDA));
 77:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", MatMPISELLSetPreallocation_MPISELLCUDA));
 78:   PetscFunctionReturn(PETSC_SUCCESS);
 79: }

 81: PETSC_EXTERN PetscErrorCode MatCreate_MPISELLCUDA(Mat A)
 82: {
 83:   PetscFunctionBegin;
 84:   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
 85:   PetscCall(MatCreate_MPISELL(A));
 86:   PetscCall(MatConvert_MPISELL_MPISELLCUDA(A, MATMPISELLCUDA, MAT_INPLACE_MATRIX, &A));
 87:   PetscFunctionReturn(PETSC_SUCCESS);
 88: }

 90: /*@
 91:   MatCreateSELLCUDA - Creates a sparse matrix in SELL format.
 92:   This matrix will ultimately pushed down to NVIDIA GPUs.

 94:   Collective

 96:   Input Parameters:
 97: + comm  - MPI communicator, set to `PETSC_COMM_SELF`
 98: . m     - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
 99:            This value should be the same as the local size used in creating the
100:            y vector for the matrix-vector product y = Ax.
101: . n     - This value should be the same as the local size used in creating the
102:        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
103:        calculated if `N` is given) For square matrices `n` is almost always `m`.
104: . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
105: . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
106: . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
107:            (same value is used for all local rows)
108: . d_nnz - array containing the number of nonzeros in the various rows of the
109:            DIAGONAL portion of the local submatrix (possibly different for each row)
110:            or `NULL`, if `d_nz` is used to specify the nonzero structure.
111:            The size of this array is equal to the number of local rows, i.e `m`.
112:            For matrices you plan to factor you must leave room for the diagonal entry and
113:            put in the entry even if it is zero.
114: . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
115:            submatrix (same value is used for all local rows).
116: - o_nnz - array containing the number of nonzeros in the various rows of the
117:            OFF-DIAGONAL portion of the local submatrix (possibly different for
118:            each row) or `NULL`, if `o_nz` is used to specify the nonzero
119:            structure. The size of this array is equal to the number
120:            of local rows, i.e `m`.

122:   Output Parameter:
123: . A - the matrix

125:   Level: intermediate

127:   Notes:
128:   If `nnz` is given then `nz` is ignored

130:   Specify the preallocated storage with either `nz` or `nnz` (not both).
131:   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
132:   allocation.

134: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSELL()`, `MatSetValues()`, `MATMPISELLCUDA`, `MATSELLCUDA`
135: @*/
136: PetscErrorCode MatCreateSELLCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
137: {
138:   PetscMPIInt size;

140:   PetscFunctionBegin;
141:   PetscCall(MatCreate(comm, A));
142:   PetscCall(MatSetSizes(*A, m, n, M, N));
143:   PetscCallMPI(MPI_Comm_size(comm, &size));
144:   if (size > 1) {
145:     PetscCall(MatSetType(*A, MATMPISELLCUDA));
146:     PetscCall(MatMPISELLSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
147:   } else {
148:     PetscCall(MatSetType(*A, MATSEQSELLCUDA));
149:     PetscCall(MatSeqSELLSetPreallocation(*A, d_nz, d_nnz));
150:   }
151:   PetscFunctionReturn(PETSC_SUCCESS);
152: }

154: /*MC
155:    MATSELLCUDA - "sellcuda" = "mpisellcuda" - A matrix type to be used for sparse matrices.

157:    Sliced ELLPACK matrix type whose data resides on NVIDIA GPUs.

159:    This matrix type is identical to `MATSEQSELLCUDA` when constructed with a single process communicator,
160:    and `MATMPISELLCUDA` otherwise.  As a result, for single process communicators,
161:    `MatSeqSELLSetPreallocation()` is supported, and similarly `MatMPISELLSetPreallocation()` is supported
162:    for communicators controlling multiple processes.  It is recommended that you call both of
163:    the above preallocation routines for simplicity.

165:    Options Database Key:
166: .  -mat_type mpisellcuda - sets the matrix type to `MATMPISELLCUDA` during a call to MatSetFromOptions()

168:   Level: beginner

170: .seealso: `MatCreateSELLCUDA()`, `MATSEQSELLCUDA`, `MatCreateSeqSELLCUDA()`, `MatCUDAFormatOperation()`
171: M*/