Actual source code: baij.c
1: #define PETSCMAT_DLL
3: /*
4: Defines the basic matrix operations for the BAIJ (compressed row)
5: matrix storage format.
6: */
7: #include src/mat/impls/baij/seq/baij.h
8: #include src/inline/spops.h
9: #include petscsys.h
11: #include src/inline/ilu.h
15: /*@C
16: MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
18: Collective on Mat
20: Input Parameters:
21: . mat - the matrix
23: Level: advanced
24: @*/
25: PetscErrorCode MatSeqBAIJInvertBlockDiagonal(Mat mat)
26: {
27: PetscErrorCode ierr,(*f)(Mat);
31: if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32: if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
34: PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);
35: if (f) {
36: (*f)(mat);
37: } else {
38: SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39: }
40: return(0);
41: }
46: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47: {
48: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
50: PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51: PetscScalar *v = a->a,*odiag,*diag,*mdiag;
54: if (a->idiagvalid) return(0);
55: MatMarkDiagonal_SeqBAIJ(A);
56: diag_offset = a->diag;
57: if (!a->idiag) {
58: PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);
59: }
60: diag = a->idiag;
61: mdiag = a->idiag+bs*bs*mbs;
62: /* factor and invert each block */
63: switch (bs){
64: case 2:
65: for (i=0; i<mbs; i++) {
66: odiag = v + 4*diag_offset[i];
67: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69: Kernel_A_gets_inverse_A_2(diag);
70: diag += 4;
71: mdiag += 4;
72: }
73: break;
74: case 3:
75: for (i=0; i<mbs; i++) {
76: odiag = v + 9*diag_offset[i];
77: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78: diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79: diag[8] = odiag[8];
80: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81: mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82: mdiag[8] = odiag[8];
83: Kernel_A_gets_inverse_A_3(diag);
84: diag += 9;
85: mdiag += 9;
86: }
87: break;
88: case 4:
89: for (i=0; i<mbs; i++) {
90: odiag = v + 16*diag_offset[i];
91: PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
92: PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
93: Kernel_A_gets_inverse_A_4(diag);
94: diag += 16;
95: mdiag += 16;
96: }
97: break;
98: case 5:
99: for (i=0; i<mbs; i++) {
100: odiag = v + 25*diag_offset[i];
101: PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
102: PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
103: Kernel_A_gets_inverse_A_5(diag);
104: diag += 25;
105: mdiag += 25;
106: }
107: break;
108: default:
109: SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110: }
111: a->idiagvalid = PETSC_TRUE;
112: return(0);
113: }
118: PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119: {
120: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
121: PetscScalar *x,x1,x2,s1,s2;
122: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
123: PetscErrorCode ierr;
124: PetscInt m = a->mbs,i,i2,nz,idx;
125: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
128: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129: its = its*lits;
130: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133: if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
134: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
136: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
138: diag = a->diag;
139: idiag = a->idiag;
140: VecGetArray(xx,&x);
141: VecGetArray(bb,(PetscScalar**)&b);
143: if (flag & SOR_ZERO_INITIAL_GUESS) {
144: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145: x[0] = b[0]*idiag[0] + b[1]*idiag[2];
146: x[1] = b[0]*idiag[1] + b[1]*idiag[3];
147: i2 = 2;
148: idiag += 4;
149: for (i=1; i<m; i++) {
150: v = aa + 4*ai[i];
151: vi = aj + ai[i];
152: nz = diag[i] - ai[i];
153: s1 = b[i2]; s2 = b[i2+1];
154: while (nz--) {
155: idx = 2*(*vi++);
156: x1 = x[idx]; x2 = x[1+idx];
157: s1 -= v[0]*x1 + v[2]*x2;
158: s2 -= v[1]*x1 + v[3]*x2;
159: v += 4;
160: }
161: x[i2] = idiag[0]*s1 + idiag[2]*s2;
162: x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
163: idiag += 4;
164: i2 += 2;
165: }
166: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
167: PetscLogFlops(4*(a->nz));
168: }
169: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
170: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
171: i2 = 0;
172: mdiag = a->idiag+4*a->mbs;
173: for (i=0; i<m; i++) {
174: x1 = x[i2]; x2 = x[i2+1];
175: x[i2] = mdiag[0]*x1 + mdiag[2]*x2;
176: x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
177: mdiag += 4;
178: i2 += 2;
179: }
180: PetscLogFlops(6*m);
181: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
182: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
183: }
184: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
185: idiag = a->idiag+4*a->mbs - 4;
186: i2 = 2*m - 2;
187: x1 = x[i2]; x2 = x[i2+1];
188: x[i2] = idiag[0]*x1 + idiag[2]*x2;
189: x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
190: idiag -= 4;
191: i2 -= 2;
192: for (i=m-2; i>=0; i--) {
193: v = aa + 4*(diag[i]+1);
194: vi = aj + diag[i] + 1;
195: nz = ai[i+1] - diag[i] - 1;
196: s1 = x[i2]; s2 = x[i2+1];
197: while (nz--) {
198: idx = 2*(*vi++);
199: x1 = x[idx]; x2 = x[1+idx];
200: s1 -= v[0]*x1 + v[2]*x2;
201: s2 -= v[1]*x1 + v[3]*x2;
202: v += 4;
203: }
204: x[i2] = idiag[0]*s1 + idiag[2]*s2;
205: x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
206: idiag -= 4;
207: i2 -= 2;
208: }
209: PetscLogFlops(4*(a->nz));
210: }
211: } else {
212: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
213: }
214: VecRestoreArray(xx,&x);
215: VecRestoreArray(bb,(PetscScalar**)&b);
216: return(0);
217: }
221: PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
222: {
223: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
224: PetscScalar *x,x1,x2,x3,s1,s2,s3;
225: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
226: PetscErrorCode ierr;
227: PetscInt m = a->mbs,i,i2,nz,idx;
228: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
231: its = its*lits;
232: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
233: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
234: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
235: if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
236: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
238: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
240: diag = a->diag;
241: idiag = a->idiag;
242: VecGetArray(xx,&x);
243: VecGetArray(bb,(PetscScalar**)&b);
245: if (flag & SOR_ZERO_INITIAL_GUESS) {
246: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
247: x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
248: x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
249: x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
250: i2 = 3;
251: idiag += 9;
252: for (i=1; i<m; i++) {
253: v = aa + 9*ai[i];
254: vi = aj + ai[i];
255: nz = diag[i] - ai[i];
256: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
257: while (nz--) {
258: idx = 3*(*vi++);
259: x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
260: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
261: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
262: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
263: v += 9;
264: }
265: x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
266: x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
267: x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
268: idiag += 9;
269: i2 += 3;
270: }
271: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
272: PetscLogFlops(9*(a->nz));
273: }
274: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
275: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
276: i2 = 0;
277: mdiag = a->idiag+9*a->mbs;
278: for (i=0; i<m; i++) {
279: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
280: x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
281: x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
282: x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
283: mdiag += 9;
284: i2 += 3;
285: }
286: PetscLogFlops(15*m);
287: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
288: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
289: }
290: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
291: idiag = a->idiag+9*a->mbs - 9;
292: i2 = 3*m - 3;
293: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
294: x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
295: x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
296: x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
297: idiag -= 9;
298: i2 -= 3;
299: for (i=m-2; i>=0; i--) {
300: v = aa + 9*(diag[i]+1);
301: vi = aj + diag[i] + 1;
302: nz = ai[i+1] - diag[i] - 1;
303: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
304: while (nz--) {
305: idx = 3*(*vi++);
306: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
307: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
308: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
309: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
310: v += 9;
311: }
312: x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
313: x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
314: x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
315: idiag -= 9;
316: i2 -= 3;
317: }
318: PetscLogFlops(9*(a->nz));
319: }
320: } else {
321: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
322: }
323: VecRestoreArray(xx,&x);
324: VecRestoreArray(bb,(PetscScalar**)&b);
325: return(0);
326: }
330: PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
331: {
332: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
333: PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4;
334: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
335: PetscErrorCode ierr;
336: PetscInt m = a->mbs,i,i2,nz,idx;
337: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
340: its = its*lits;
341: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
342: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
343: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
344: if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
345: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
347: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
349: diag = a->diag;
350: idiag = a->idiag;
351: VecGetArray(xx,&x);
352: VecGetArray(bb,(PetscScalar**)&b);
354: if (flag & SOR_ZERO_INITIAL_GUESS) {
355: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
356: x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12];
357: x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13];
358: x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
359: x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
360: i2 = 4;
361: idiag += 16;
362: for (i=1; i<m; i++) {
363: v = aa + 16*ai[i];
364: vi = aj + ai[i];
365: nz = diag[i] - ai[i];
366: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
367: while (nz--) {
368: idx = 4*(*vi++);
369: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
370: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
371: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
372: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
373: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
374: v += 16;
375: }
376: x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4;
377: x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4;
378: x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
379: x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
380: idiag += 16;
381: i2 += 4;
382: }
383: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
384: PetscLogFlops(16*(a->nz));
385: }
386: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
387: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
388: i2 = 0;
389: mdiag = a->idiag+16*a->mbs;
390: for (i=0; i<m; i++) {
391: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
392: x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4;
393: x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4;
394: x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
395: x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
396: mdiag += 16;
397: i2 += 4;
398: }
399: PetscLogFlops(28*m);
400: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
401: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
402: }
403: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
404: idiag = a->idiag+16*a->mbs - 16;
405: i2 = 4*m - 4;
406: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
407: x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4;
408: x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4;
409: x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
410: x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
411: idiag -= 16;
412: i2 -= 4;
413: for (i=m-2; i>=0; i--) {
414: v = aa + 16*(diag[i]+1);
415: vi = aj + diag[i] + 1;
416: nz = ai[i+1] - diag[i] - 1;
417: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
418: while (nz--) {
419: idx = 4*(*vi++);
420: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
421: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
422: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
423: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
424: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
425: v += 16;
426: }
427: x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4;
428: x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4;
429: x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
430: x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
431: idiag -= 16;
432: i2 -= 4;
433: }
434: PetscLogFlops(16*(a->nz));
435: }
436: } else {
437: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
438: }
439: VecRestoreArray(xx,&x);
440: VecRestoreArray(bb,(PetscScalar**)&b);
441: return(0);
442: }
446: PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
447: {
448: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
449: PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
450: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
451: PetscErrorCode ierr;
452: PetscInt m = a->mbs,i,i2,nz,idx;
453: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
456: its = its*lits;
457: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
458: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
459: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
460: if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
461: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
463: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
465: diag = a->diag;
466: idiag = a->idiag;
467: VecGetArray(xx,&x);
468: VecGetArray(bb,(PetscScalar**)&b);
470: if (flag & SOR_ZERO_INITIAL_GUESS) {
471: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
472: x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
473: x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
474: x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
475: x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
476: x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
477: i2 = 5;
478: idiag += 25;
479: for (i=1; i<m; i++) {
480: v = aa + 25*ai[i];
481: vi = aj + ai[i];
482: nz = diag[i] - ai[i];
483: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
484: while (nz--) {
485: idx = 5*(*vi++);
486: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
487: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
488: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
489: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
490: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
491: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
492: v += 25;
493: }
494: x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
495: x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
496: x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
497: x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
498: x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
499: idiag += 25;
500: i2 += 5;
501: }
502: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
503: PetscLogFlops(25*(a->nz));
504: }
505: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
506: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
507: i2 = 0;
508: mdiag = a->idiag+25*a->mbs;
509: for (i=0; i<m; i++) {
510: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
511: x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
512: x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
513: x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
514: x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
515: x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
516: mdiag += 25;
517: i2 += 5;
518: }
519: PetscLogFlops(45*m);
520: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
521: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
522: }
523: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
524: idiag = a->idiag+25*a->mbs - 25;
525: i2 = 5*m - 5;
526: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
527: x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
528: x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
529: x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
530: x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
531: x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
532: idiag -= 25;
533: i2 -= 5;
534: for (i=m-2; i>=0; i--) {
535: v = aa + 25*(diag[i]+1);
536: vi = aj + diag[i] + 1;
537: nz = ai[i+1] - diag[i] - 1;
538: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
539: while (nz--) {
540: idx = 5*(*vi++);
541: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
542: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
543: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
544: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
545: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
546: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
547: v += 25;
548: }
549: x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
550: x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
551: x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
552: x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
553: x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
554: idiag -= 25;
555: i2 -= 5;
556: }
557: PetscLogFlops(25*(a->nz));
558: }
559: } else {
560: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
561: }
562: VecRestoreArray(xx,&x);
563: VecRestoreArray(bb,(PetscScalar**)&b);
564: return(0);
565: }
567: /*
568: Special version for direct calls from Fortran (Used in PETSc-fun3d)
569: */
570: #if defined(PETSC_HAVE_FORTRAN_CAPS)
571: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
572: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
573: #define matsetvaluesblocked4_ matsetvaluesblocked4
574: #endif
579: void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
580: {
581: Mat A = *AA;
582: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
583: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
584: PetscInt *ai=a->i,*ailen=a->ilen;
585: PetscInt *aj=a->j,stepval,lastcol = -1;
586: const PetscScalar *value = v;
587: MatScalar *ap,*aa = a->a,*bap;
590: if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
591: stepval = (n-1)*4;
592: for (k=0; k<m; k++) { /* loop over added rows */
593: row = im[k];
594: rp = aj + ai[row];
595: ap = aa + 16*ai[row];
596: nrow = ailen[row];
597: low = 0;
598: high = nrow;
599: for (l=0; l<n; l++) { /* loop over added columns */
600: col = in[l];
601: if (col <= lastcol) low = 0; else high = nrow;
602: lastcol = col;
603: value = v + k*(stepval+4 + l)*4;
604: while (high-low > 7) {
605: t = (low+high)/2;
606: if (rp[t] > col) high = t;
607: else low = t;
608: }
609: for (i=low; i<high; i++) {
610: if (rp[i] > col) break;
611: if (rp[i] == col) {
612: bap = ap + 16*i;
613: for (ii=0; ii<4; ii++,value+=stepval) {
614: for (jj=ii; jj<16; jj+=4) {
615: bap[jj] += *value++;
616: }
617: }
618: goto noinsert2;
619: }
620: }
621: N = nrow++ - 1;
622: high++; /* added new column index thus must search to one higher than before */
623: /* shift up all the later entries in this row */
624: for (ii=N; ii>=i; ii--) {
625: rp[ii+1] = rp[ii];
626: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
627: }
628: if (N >= i) {
629: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
630: }
631: rp[i] = col;
632: bap = ap + 16*i;
633: for (ii=0; ii<4; ii++,value+=stepval) {
634: for (jj=ii; jj<16; jj+=4) {
635: bap[jj] = *value++;
636: }
637: }
638: noinsert2:;
639: low = i;
640: }
641: ailen[row] = nrow;
642: }
643: PetscFunctionReturnVoid();
644: }
647: #if defined(PETSC_HAVE_FORTRAN_CAPS)
648: #define matsetvalues4_ MATSETVALUES4
649: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
650: #define matsetvalues4_ matsetvalues4
651: #endif
656: void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
657: {
658: Mat A = *AA;
659: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
660: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
661: PetscInt *ai=a->i,*ailen=a->ilen;
662: PetscInt *aj=a->j,brow,bcol;
663: PetscInt ridx,cidx,lastcol = -1;
664: MatScalar *ap,value,*aa=a->a,*bap;
665:
667: for (k=0; k<m; k++) { /* loop over added rows */
668: row = im[k]; brow = row/4;
669: rp = aj + ai[brow];
670: ap = aa + 16*ai[brow];
671: nrow = ailen[brow];
672: low = 0;
673: high = nrow;
674: for (l=0; l<n; l++) { /* loop over added columns */
675: col = in[l]; bcol = col/4;
676: ridx = row % 4; cidx = col % 4;
677: value = v[l + k*n];
678: if (col <= lastcol) low = 0; else high = nrow;
679: lastcol = col;
680: while (high-low > 7) {
681: t = (low+high)/2;
682: if (rp[t] > bcol) high = t;
683: else low = t;
684: }
685: for (i=low; i<high; i++) {
686: if (rp[i] > bcol) break;
687: if (rp[i] == bcol) {
688: bap = ap + 16*i + 4*cidx + ridx;
689: *bap += value;
690: goto noinsert1;
691: }
692: }
693: N = nrow++ - 1;
694: high++; /* added new column thus must search to one higher than before */
695: /* shift up all the later entries in this row */
696: for (ii=N; ii>=i; ii--) {
697: rp[ii+1] = rp[ii];
698: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
699: }
700: if (N>=i) {
701: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
702: }
703: rp[i] = bcol;
704: ap[16*i + 4*cidx + ridx] = value;
705: noinsert1:;
706: low = i;
707: }
708: ailen[brow] = nrow;
709: }
710: PetscFunctionReturnVoid();
711: }
714: /* UGLY, ugly, ugly
715: When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
716: not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
717: inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
718: converts the entries into single precision and then calls ..._MatScalar() to put them
719: into the single precision data structures.
720: */
721: #if defined(PETSC_USE_MAT_SINGLE)
722: EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
723: #else
724: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
725: #endif
727: #define CHUNKSIZE 10
729: /*
730: Checks for missing diagonals
731: */
734: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
735: {
736: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
738: PetscInt *diag,*jj = a->j,i;
741: MatMarkDiagonal_SeqBAIJ(A);
742: diag = a->diag;
743: for (i=0; i<a->mbs; i++) {
744: if (jj[diag[i]] != i) {
745: SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
746: }
747: }
748: return(0);
749: }
753: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
754: {
755: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
757: PetscInt i,j,m = a->mbs;
760: if (!a->diag) {
761: PetscMalloc(m*sizeof(PetscInt),&a->diag);
762: }
763: for (i=0; i<m; i++) {
764: a->diag[i] = a->i[i+1];
765: for (j=a->i[i]; j<a->i[i+1]; j++) {
766: if (a->j[j] == i) {
767: a->diag[i] = j;
768: break;
769: }
770: }
771: }
772: return(0);
773: }
776: EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
780: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
781: {
782: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
784: PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs;
785: PetscInt *tia, *tja;
788: *nn = n;
789: if (!ia) return(0);
790: if (symmetric) {
791: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);
792: } else {
793: tia = a->i; tja = a->j;
794: }
795:
796: if (!blockcompressed) {
797: /* malloc & create the natural set of indices */
798: PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);
799: for (i=0; i<n+1; i++) {
800: for (j=0; j<bs; j++) {
801: *ia[i*bs+j] = tia[i]*bs+j;
802: }
803: }
804: for (i=0; i<nz; i++) {
805: for (j=0; j<bs; j++) {
806: *ja[i*bs+j] = tia[i]*bs+j;
807: }
808: }
809: if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
810: PetscFree(tia);
811: PetscFree(tja);
812: }
813: } else {
814: *ia = tia;
815: *ja = tja;
816: }
817: if (oshift == 1) {
818: for (i=0; i<nz; i++) (*ja)[i]++;
819: for (i=0; i<n+1; i++) (*ia)[i]++;
820: }
821: return(0);
822: }
826: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
827: {
828: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
830: PetscInt i,n = a->mbs,nz = a->i[n];
833: if (!ia) return(0);
834: if (!blockcompressed) {
835: PetscFree2(*ia,*ja);
836: }else if (symmetric) {
837: PetscFree(*ia);
838: PetscFree(*ja);
839: } else if (oshift == 1) { /* blockcompressed */
840: for (i=0; i<nz; i++) a->j[i]--;
841: for (i=0; i<n+1; i++) a->i[i]--;
842: }
843: return(0);
844: }
848: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
849: {
850: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
854: #if defined(PETSC_USE_LOG)
855: PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
856: #endif
857: MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
858: if (a->row) {
859: ISDestroy(a->row);
860: }
861: if (a->col) {
862: ISDestroy(a->col);
863: }
864: PetscFree(a->diag);
865: PetscFree(a->idiag);
866: PetscFree2(a->imax,a->ilen);
867: PetscFree(a->solve_work);
868: PetscFree(a->mult_work);
869: if (a->icol) {ISDestroy(a->icol);}
870: PetscFree(a->saved_values);
871: #if defined(PETSC_USE_MAT_SINGLE)
872: PetscFree(a->setvaluescopy);
873: #endif
874: PetscFree(a->xtoy);
875: if (a->compressedrow.use){PetscFree(a->compressedrow.i);}
877: PetscFree(a);
879: PetscObjectChangeTypeName((PetscObject)A,0);
880: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);
881: PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);
882: PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);
883: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);
884: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);
885: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);
886: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);
887: return(0);
888: }
892: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
893: {
894: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
898: switch (op) {
899: case MAT_ROW_ORIENTED:
900: a->roworiented = PETSC_TRUE;
901: break;
902: case MAT_COLUMN_ORIENTED:
903: a->roworiented = PETSC_FALSE;
904: break;
905: case MAT_COLUMNS_SORTED:
906: a->sorted = PETSC_TRUE;
907: break;
908: case MAT_COLUMNS_UNSORTED:
909: a->sorted = PETSC_FALSE;
910: break;
911: case MAT_KEEP_ZEROED_ROWS:
912: a->keepzeroedrows = PETSC_TRUE;
913: break;
914: case MAT_NO_NEW_NONZERO_LOCATIONS:
915: a->nonew = 1;
916: break;
917: case MAT_NEW_NONZERO_LOCATION_ERR:
918: a->nonew = -1;
919: break;
920: case MAT_NEW_NONZERO_ALLOCATION_ERR:
921: a->nonew = -2;
922: break;
923: case MAT_YES_NEW_NONZERO_LOCATIONS:
924: a->nonew = 0;
925: break;
926: case MAT_ROWS_SORTED:
927: case MAT_ROWS_UNSORTED:
928: case MAT_YES_NEW_DIAGONALS:
929: case MAT_IGNORE_OFF_PROC_ENTRIES:
930: case MAT_USE_HASH_TABLE:
931: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
932: break;
933: case MAT_NO_NEW_DIAGONALS:
934: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
935: case MAT_SYMMETRIC:
936: case MAT_STRUCTURALLY_SYMMETRIC:
937: case MAT_NOT_SYMMETRIC:
938: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
939: case MAT_HERMITIAN:
940: case MAT_NOT_HERMITIAN:
941: case MAT_SYMMETRY_ETERNAL:
942: case MAT_NOT_SYMMETRY_ETERNAL:
943: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
944: break;
945: default:
946: SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
947: }
948: return(0);
949: }
953: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
954: {
955: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
957: PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
958: MatScalar *aa,*aa_i;
959: PetscScalar *v_i;
962: bs = A->rmap.bs;
963: ai = a->i;
964: aj = a->j;
965: aa = a->a;
966: bs2 = a->bs2;
967:
968: if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
969:
970: bn = row/bs; /* Block number */
971: bp = row % bs; /* Block Position */
972: M = ai[bn+1] - ai[bn];
973: *nz = bs*M;
974:
975: if (v) {
976: *v = 0;
977: if (*nz) {
978: PetscMalloc((*nz)*sizeof(PetscScalar),v);
979: for (i=0; i<M; i++) { /* for each block in the block row */
980: v_i = *v + i*bs;
981: aa_i = aa + bs2*(ai[bn] + i);
982: for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
983: }
984: }
985: }
987: if (idx) {
988: *idx = 0;
989: if (*nz) {
990: PetscMalloc((*nz)*sizeof(PetscInt),idx);
991: for (i=0; i<M; i++) { /* for each block in the block row */
992: idx_i = *idx + i*bs;
993: itmp = bs*aj[ai[bn] + i];
994: for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
995: }
996: }
997: }
998: return(0);
999: }
1003: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1004: {
1008: if (idx) {PetscFree(*idx);}
1009: if (v) {PetscFree(*v);}
1010: return(0);
1011: }
1015: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
1016: {
1017: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
1018: Mat C;
1020: PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1021: PetscInt *rows,*cols,bs2=a->bs2;
1022: PetscScalar *array;
1025: if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1026: PetscMalloc((1+nbs)*sizeof(PetscInt),&col);
1027: PetscMemzero(col,(1+nbs)*sizeof(PetscInt));
1029: #if defined(PETSC_USE_MAT_SINGLE)
1030: PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
1031: for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1032: #else
1033: array = a->a;
1034: #endif
1036: for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1037: MatCreate(A->comm,&C);
1038: MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);
1039: MatSetType(C,A->type_name);
1040: MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);
1041: PetscFree(col);
1042: PetscMalloc(2*bs*sizeof(PetscInt),&rows);
1043: cols = rows + bs;
1044: for (i=0; i<mbs; i++) {
1045: cols[0] = i*bs;
1046: for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1047: len = ai[i+1] - ai[i];
1048: for (j=0; j<len; j++) {
1049: rows[0] = (*aj++)*bs;
1050: for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1051: MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
1052: array += bs2;
1053: }
1054: }
1055: PetscFree(rows);
1056: #if defined(PETSC_USE_MAT_SINGLE)
1057: PetscFree(array);
1058: #endif
1059:
1060: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1061: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1062:
1063: if (B) {
1064: *B = C;
1065: } else {
1066: MatHeaderCopy(A,C);
1067: }
1068: return(0);
1069: }
1073: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1074: {
1075: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1077: PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1078: int fd;
1079: PetscScalar *aa;
1080: FILE *file;
1083: PetscViewerBinaryGetDescriptor(viewer,&fd);
1084: PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);
1085: col_lens[0] = MAT_FILE_COOKIE;
1087: col_lens[1] = A->rmap.N;
1088: col_lens[2] = A->cmap.n;
1089: col_lens[3] = a->nz*bs2;
1091: /* store lengths of each row and write (including header) to file */
1092: count = 0;
1093: for (i=0; i<a->mbs; i++) {
1094: for (j=0; j<bs; j++) {
1095: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1096: }
1097: }
1098: PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);
1099: PetscFree(col_lens);
1101: /* store column indices (zero start index) */
1102: PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);
1103: count = 0;
1104: for (i=0; i<a->mbs; i++) {
1105: for (j=0; j<bs; j++) {
1106: for (k=a->i[i]; k<a->i[i+1]; k++) {
1107: for (l=0; l<bs; l++) {
1108: jj[count++] = bs*a->j[k] + l;
1109: }
1110: }
1111: }
1112: }
1113: PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1114: PetscFree(jj);
1116: /* store nonzero values */
1117: PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1118: count = 0;
1119: for (i=0; i<a->mbs; i++) {
1120: for (j=0; j<bs; j++) {
1121: for (k=a->i[i]; k<a->i[i+1]; k++) {
1122: for (l=0; l<bs; l++) {
1123: aa[count++] = a->a[bs2*k + l*bs + j];
1124: }
1125: }
1126: }
1127: }
1128: PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1129: PetscFree(aa);
1131: PetscViewerBinaryGetInfoPointer(viewer,&file);
1132: if (file) {
1133: fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1134: }
1135: return(0);
1136: }
1140: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1141: {
1142: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1143: PetscErrorCode ierr;
1144: PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1145: PetscViewerFormat format;
1148: PetscViewerGetFormat(viewer,&format);
1149: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1150: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
1151: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1152: Mat aij;
1153: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1154: MatView(aij,viewer);
1155: MatDestroy(aij);
1156: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1157: return(0);
1158: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1159: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1160: for (i=0; i<a->mbs; i++) {
1161: for (j=0; j<bs; j++) {
1162: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1163: for (k=a->i[i]; k<a->i[i+1]; k++) {
1164: for (l=0; l<bs; l++) {
1165: #if defined(PETSC_USE_COMPLEX)
1166: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1167: PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1168: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1169: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1170: PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1171: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1172: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1173: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1174: }
1175: #else
1176: if (a->a[bs2*k + l*bs + j] != 0.0) {
1177: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1178: }
1179: #endif
1180: }
1181: }
1182: PetscViewerASCIIPrintf(viewer,"\n");
1183: }
1184: }
1185: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1186: } else {
1187: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1188: for (i=0; i<a->mbs; i++) {
1189: for (j=0; j<bs; j++) {
1190: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1191: for (k=a->i[i]; k<a->i[i+1]; k++) {
1192: for (l=0; l<bs; l++) {
1193: #if defined(PETSC_USE_COMPLEX)
1194: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1195: PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1196: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1197: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1198: PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1199: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1200: } else {
1201: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1202: }
1203: #else
1204: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1205: #endif
1206: }
1207: }
1208: PetscViewerASCIIPrintf(viewer,"\n");
1209: }
1210: }
1211: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1212: }
1213: PetscViewerFlush(viewer);
1214: return(0);
1215: }
1219: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1220: {
1221: Mat A = (Mat) Aa;
1222: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1224: PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1225: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1226: MatScalar *aa;
1227: PetscViewer viewer;
1231: /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1232: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1234: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
1236: /* loop over matrix elements drawing boxes */
1237: color = PETSC_DRAW_BLUE;
1238: for (i=0,row=0; i<mbs; i++,row+=bs) {
1239: for (j=a->i[i]; j<a->i[i+1]; j++) {
1240: y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1241: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1242: aa = a->a + j*bs2;
1243: for (k=0; k<bs; k++) {
1244: for (l=0; l<bs; l++) {
1245: if (PetscRealPart(*aa++) >= 0.) continue;
1246: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1247: }
1248: }
1249: }
1250: }
1251: color = PETSC_DRAW_CYAN;
1252: for (i=0,row=0; i<mbs; i++,row+=bs) {
1253: for (j=a->i[i]; j<a->i[i+1]; j++) {
1254: y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1255: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1256: aa = a->a + j*bs2;
1257: for (k=0; k<bs; k++) {
1258: for (l=0; l<bs; l++) {
1259: if (PetscRealPart(*aa++) != 0.) continue;
1260: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1261: }
1262: }
1263: }
1264: }
1266: color = PETSC_DRAW_RED;
1267: for (i=0,row=0; i<mbs; i++,row+=bs) {
1268: for (j=a->i[i]; j<a->i[i+1]; j++) {
1269: y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1270: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1271: aa = a->a + j*bs2;
1272: for (k=0; k<bs; k++) {
1273: for (l=0; l<bs; l++) {
1274: if (PetscRealPart(*aa++) <= 0.) continue;
1275: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1276: }
1277: }
1278: }
1279: }
1280: return(0);
1281: }
1285: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1286: {
1288: PetscReal xl,yl,xr,yr,w,h;
1289: PetscDraw draw;
1290: PetscTruth isnull;
1294: PetscViewerDrawGetDraw(viewer,0,&draw);
1295: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1297: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1298: xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1299: xr += w; yr += h; xl = -w; yl = -h;
1300: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1301: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1302: PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
1303: return(0);
1304: }
1308: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1309: {
1311: PetscTruth iascii,isbinary,isdraw;
1314: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1315: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1316: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1317: if (iascii){
1318: MatView_SeqBAIJ_ASCII(A,viewer);
1319: } else if (isbinary) {
1320: MatView_SeqBAIJ_Binary(A,viewer);
1321: } else if (isdraw) {
1322: MatView_SeqBAIJ_Draw(A,viewer);
1323: } else {
1324: Mat B;
1325: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1326: MatView(B,viewer);
1327: MatDestroy(B);
1328: }
1329: return(0);
1330: }
1335: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1336: {
1337: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1338: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1339: PetscInt *ai = a->i,*ailen = a->ilen;
1340: PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1341: MatScalar *ap,*aa = a->a,zero = 0.0;
1344: for (k=0; k<m; k++) { /* loop over rows */
1345: row = im[k]; brow = row/bs;
1346: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1347: if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1348: rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1349: nrow = ailen[brow];
1350: for (l=0; l<n; l++) { /* loop over columns */
1351: if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1352: if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1353: col = in[l] ;
1354: bcol = col/bs;
1355: cidx = col%bs;
1356: ridx = row%bs;
1357: high = nrow;
1358: low = 0; /* assume unsorted */
1359: while (high-low > 5) {
1360: t = (low+high)/2;
1361: if (rp[t] > bcol) high = t;
1362: else low = t;
1363: }
1364: for (i=low; i<high; i++) {
1365: if (rp[i] > bcol)