Actual source code: crl.c
2: /*
3: Defines a matrix-vector product for the MATSEQAIJCRL matrix class.
4: This class is derived from the MATSEQAIJ class and retains the
5: compressed row storage (aka Yale sparse matrix format) but augments
6: it with a column oriented storage that is more efficient for
7: matrix vector products on Vector machines.
9: CRL stands for constant row length (that is the same number of columns
10: is kept (padded with zeros) for each row of the sparse matrix.
11: */
12: #include <../src/mat/impls/aij/seq/crl/crl.h>
14: PetscErrorCode MatDestroy_SeqAIJCRL(Mat A)
15: {
16: Mat_AIJCRL *aijcrl = (Mat_AIJCRL *)A->spptr;
18: /* Free everything in the Mat_AIJCRL data structure. */
19: if (aijcrl) PetscFree2(aijcrl->acols, aijcrl->icols);
20: PetscFree(A->spptr);
21: PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
22: MatDestroy_SeqAIJ(A);
23: return 0;
24: }
26: PetscErrorCode MatDuplicate_AIJCRL(Mat A, MatDuplicateOption op, Mat *M)
27: {
28: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot duplicate AIJCRL matrices yet");
29: }
31: PetscErrorCode MatSeqAIJCRL_create_aijcrl(Mat A)
32: {
33: Mat_SeqAIJ *a = (Mat_SeqAIJ *)(A)->data;
34: Mat_AIJCRL *aijcrl = (Mat_AIJCRL *)A->spptr;
35: PetscInt m = A->rmap->n; /* Number of rows in the matrix. */
36: PetscInt *aj = a->j; /* From the CSR representation; points to the beginning of each row. */
37: PetscInt i, j, rmax = a->rmax, *icols, *ilen = a->ilen;
38: MatScalar *aa = a->a;
39: PetscScalar *acols;
41: aijcrl->nz = a->nz;
42: aijcrl->m = A->rmap->n;
43: aijcrl->rmax = rmax;
45: PetscFree2(aijcrl->acols, aijcrl->icols);
46: PetscMalloc2(rmax * m, &aijcrl->acols, rmax * m, &aijcrl->icols);
47: acols = aijcrl->acols;
48: icols = aijcrl->icols;
49: for (i = 0; i < m; i++) {
50: for (j = 0; j < ilen[i]; j++) {
51: acols[j * m + i] = *aa++;
52: icols[j * m + i] = *aj++;
53: }
54: for (; j < rmax; j++) { /* empty column entries */
55: acols[j * m + i] = 0.0;
56: icols[j * m + i] = (j) ? icols[(j - 1) * m + i] : 0; /* handle case where row is EMPTY */
57: }
58: }
59: PetscInfo(A, "Percentage of 0's introduced for vectorized multiply %g. Rmax= %" PetscInt_FMT "\n", 1.0 - ((double)a->nz) / ((double)(rmax * m)), rmax);
60: return 0;
61: }
63: PetscErrorCode MatAssemblyEnd_SeqAIJCRL(Mat A, MatAssemblyType mode)
64: {
65: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
67: a->inode.use = PETSC_FALSE;
69: MatAssemblyEnd_SeqAIJ(A, mode);
70: if (mode == MAT_FLUSH_ASSEMBLY) return 0;
72: /* Now calculate the permutation and grouping information. */
73: MatSeqAIJCRL_create_aijcrl(A);
74: return 0;
75: }
77: #include <../src/mat/impls/aij/seq/crl/ftn-kernels/fmultcrl.h>
79: /*
80: Shared by both sequential and parallel versions of CRL matrix: MATMPIAIJCRL and MATSEQAIJCRL
81: - the scatter is used only in the parallel version
83: */
84: PetscErrorCode MatMult_AIJCRL(Mat A, Vec xx, Vec yy)
85: {
86: Mat_AIJCRL *aijcrl = (Mat_AIJCRL *)A->spptr;
87: PetscInt m = aijcrl->m; /* Number of rows in the matrix. */
88: PetscInt rmax = aijcrl->rmax, *icols = aijcrl->icols;
89: PetscScalar *acols = aijcrl->acols;
90: PetscScalar *y;
91: const PetscScalar *x;
92: #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
93: PetscInt i, j, ii;
94: #endif
96: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
97: #pragma disjoint(*x, *y, *aa)
98: #endif
100: if (aijcrl->xscat) {
101: VecCopy(xx, aijcrl->xwork);
102: /* get remote values needed for local part of multiply */
103: VecScatterBegin(aijcrl->xscat, xx, aijcrl->fwork, INSERT_VALUES, SCATTER_FORWARD);
104: VecScatterEnd(aijcrl->xscat, xx, aijcrl->fwork, INSERT_VALUES, SCATTER_FORWARD);
105: xx = aijcrl->xwork;
106: }
108: VecGetArrayRead(xx, &x);
109: VecGetArray(yy, &y);
111: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
112: fortranmultcrl_(&m, &rmax, x, y, icols, acols);
113: #else
115: /* first column */
116: for (j = 0; j < m; j++) y[j] = acols[j] * x[icols[j]];
118: /* other columns */
119: #if defined(PETSC_HAVE_CRAY_VECTOR)
120: #pragma _CRI preferstream
121: #endif
122: for (i = 1; i < rmax; i++) {
123: ii = i * m;
124: #if defined(PETSC_HAVE_CRAY_VECTOR)
125: #pragma _CRI prefervector
126: #endif
127: for (j = 0; j < m; j++) y[j] = y[j] + acols[ii + j] * x[icols[ii + j]];
128: }
129: #if defined(PETSC_HAVE_CRAY_VECTOR)
130: #pragma _CRI ivdep
131: #endif
133: #endif
134: PetscLogFlops(2.0 * aijcrl->nz - m);
135: VecRestoreArrayRead(xx, &x);
136: VecRestoreArray(yy, &y);
137: return 0;
138: }
140: /* MatConvert_SeqAIJ_SeqAIJCRL converts a SeqAIJ matrix into a
141: * SeqAIJCRL matrix. This routine is called by the MatCreate_SeqAIJCRL()
142: * routine, but can also be used to convert an assembled SeqAIJ matrix
143: * into a SeqAIJCRL one. */
144: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJCRL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
145: {
146: Mat B = *newmat;
147: Mat_AIJCRL *aijcrl;
148: PetscBool sametype;
150: if (reuse == MAT_INITIAL_MATRIX) MatDuplicate(A, MAT_COPY_VALUES, &B);
151: PetscObjectTypeCompare((PetscObject)A, type, &sametype);
152: if (sametype) return 0;
154: PetscNew(&aijcrl);
155: B->spptr = (void *)aijcrl;
157: /* Set function pointers for methods that we inherit from AIJ but override. */
158: B->ops->duplicate = MatDuplicate_AIJCRL;
159: B->ops->assemblyend = MatAssemblyEnd_SeqAIJCRL;
160: B->ops->destroy = MatDestroy_SeqAIJCRL;
161: B->ops->mult = MatMult_AIJCRL;
163: /* If A has already been assembled, compute the permutation. */
164: if (A->assembled) MatSeqAIJCRL_create_aijcrl(B);
165: PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJCRL);
166: *newmat = B;
167: return 0;
168: }
170: /*@C
171: MatCreateSeqAIJCRL - Creates a sparse matrix of type `MATSEQAIJCRL`.
172: This type inherits from `MATSEQAIJ`, but stores some additional
173: information that is used to allow better vectorization of
174: the matrix-vector product. At the cost of increased storage, the `MATSEQAIJ` formatted
175: matrix can be copied to a format in which pieces of the matrix are
176: stored in ELLPACK format, allowing the vectorized matrix multiply
177: routine to use stride-1 memory accesses. As with the `MATSEQAIJ` type, it is
178: important to preallocate matrix storage in order to get good assembly
179: performance.
181: Collective
183: Input Parameters:
184: + comm - MPI communicator, set to `PETSC_COMM_SELF`
185: . m - number of rows
186: . n - number of columns
187: . nz - number of nonzeros per row (same for all rows)
188: - nnz - array containing the number of nonzeros in the various rows
189: (possibly different for each row) or NULL
191: Output Parameter:
192: . A - the matrix
194: Note:
195: If nnz is given then nz is ignored
197: Level: intermediate
199: .seealso: `MatCreate()`, `MatCreateMPIAIJPERM()`, `MatSetValues()`
200: @*/
201: PetscErrorCode MatCreateSeqAIJCRL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
202: {
203: MatCreate(comm, A);
204: MatSetSizes(*A, m, n, m, n);
205: MatSetType(*A, MATSEQAIJCRL);
206: MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz);
207: return 0;
208: }
210: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJCRL(Mat A)
211: {
212: MatSetType(A, MATSEQAIJ);
213: MatConvert_SeqAIJ_SeqAIJCRL(A, MATSEQAIJCRL, MAT_INPLACE_MATRIX, &A);
214: return 0;
215: }