Actual source code: baij.c

  1: #define PETSCMAT_DLL

  3: /*
  4:     Defines the basic matrix operations for the BAIJ (compressed row)
  5:   matrix storage format.
  6: */
 7:  #include src/mat/impls/baij/seq/baij.h
 8:  #include src/inline/spops.h
 9:  #include petscsys.h

 11:  #include src/inline/ilu.h

 15: /*@C
 16:   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.

 18:   Collective on Mat

 20:   Input Parameters:
 21: . mat - the matrix

 23:   Level: advanced
 24: @*/
 25: PetscErrorCode  MatSeqBAIJInvertBlockDiagonal(Mat mat)
 26: {
 27:   PetscErrorCode ierr,(*f)(Mat);

 31:   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
 32:   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");

 34:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);
 35:   if (f) {
 36:     (*f)(mat);
 37:   } else {
 38:     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
 39:   }
 40:   return(0);
 41: }

 46: PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A)
 47: {
 48:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
 50:   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
 51:   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;

 54:   if (a->idiagvalid) return(0);
 55:   MatMarkDiagonal_SeqBAIJ(A);
 56:   diag_offset = a->diag;
 57:   if (!a->idiag) {
 58:     PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);
 59:   }
 60:   diag  = a->idiag;
 61:   mdiag = a->idiag+bs*bs*mbs;
 62:   /* factor and invert each block */
 63:   switch (bs){
 64:     case 2:
 65:       for (i=0; i<mbs; i++) {
 66:         odiag   = v + 4*diag_offset[i];
 67:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 68:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 69:         Kernel_A_gets_inverse_A_2(diag);
 70:         diag    += 4;
 71:         mdiag   += 4;
 72:       }
 73:       break;
 74:     case 3:
 75:       for (i=0; i<mbs; i++) {
 76:         odiag    = v + 9*diag_offset[i];
 77:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 78:         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
 79:         diag[8]  = odiag[8];
 80:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 81:         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
 82:         mdiag[8] = odiag[8];
 83:         Kernel_A_gets_inverse_A_3(diag);
 84:         diag    += 9;
 85:         mdiag   += 9;
 86:       }
 87:       break;
 88:     case 4:
 89:       for (i=0; i<mbs; i++) {
 90:         odiag  = v + 16*diag_offset[i];
 91:         PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
 92:         PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
 93:         Kernel_A_gets_inverse_A_4(diag);
 94:         diag  += 16;
 95:         mdiag += 16;
 96:       }
 97:       break;
 98:     case 5:
 99:       for (i=0; i<mbs; i++) {
100:         odiag = v + 25*diag_offset[i];
101:         PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
102:         PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
103:         Kernel_A_gets_inverse_A_5(diag);
104:         diag  += 25;
105:         mdiag += 25;
106:       }
107:       break;
108:     default:
109:       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110:   }
111:   a->idiagvalid = PETSC_TRUE;
112:   return(0);
113: }

118: PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119: {
120:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121:   PetscScalar        *x,x1,x2,s1,s2;
122:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123:   PetscErrorCode     ierr;
124:   PetscInt           m = a->mbs,i,i2,nz,idx;
125:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

128:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129:   its = its*lits;
130:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133:   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
134:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

136:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

138:   diag  = a->diag;
139:   idiag = a->idiag;
140:   VecGetArray(xx,&x);
141:   VecGetArray(bb,(PetscScalar**)&b);

143:   if (flag & SOR_ZERO_INITIAL_GUESS) {
144:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145:       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
146:       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
147:       i2     = 2;
148:       idiag += 4;
149:       for (i=1; i<m; i++) {
150:         v     = aa + 4*ai[i];
151:         vi    = aj + ai[i];
152:         nz    = diag[i] - ai[i];
153:         s1    = b[i2]; s2 = b[i2+1];
154:         while (nz--) {
155:           idx  = 2*(*vi++);
156:           x1   = x[idx]; x2 = x[1+idx];
157:           s1  -= v[0]*x1 + v[2]*x2;
158:           s2  -= v[1]*x1 + v[3]*x2;
159:           v   += 4;
160:         }
161:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
162:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
163:         idiag   += 4;
164:         i2      += 2;
165:       }
166:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
167:       PetscLogFlops(4*(a->nz));
168:     }
169:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
170:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
171:       i2    = 0;
172:       mdiag = a->idiag+4*a->mbs;
173:       for (i=0; i<m; i++) {
174:         x1      = x[i2]; x2 = x[i2+1];
175:         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
176:         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
177:         mdiag  += 4;
178:         i2     += 2;
179:       }
180:       PetscLogFlops(6*m);
181:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
182:       PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
183:     }
184:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
185:       idiag   = a->idiag+4*a->mbs - 4;
186:       i2      = 2*m - 2;
187:       x1      = x[i2]; x2 = x[i2+1];
188:       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
189:       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
190:       idiag -= 4;
191:       i2    -= 2;
192:       for (i=m-2; i>=0; i--) {
193:         v     = aa + 4*(diag[i]+1);
194:         vi    = aj + diag[i] + 1;
195:         nz    = ai[i+1] - diag[i] - 1;
196:         s1    = x[i2]; s2 = x[i2+1];
197:         while (nz--) {
198:           idx  = 2*(*vi++);
199:           x1   = x[idx]; x2 = x[1+idx];
200:           s1  -= v[0]*x1 + v[2]*x2;
201:           s2  -= v[1]*x1 + v[3]*x2;
202:           v   += 4;
203:         }
204:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
205:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
206:         idiag   -= 4;
207:         i2      -= 2;
208:       }
209:       PetscLogFlops(4*(a->nz));
210:     }
211:   } else {
212:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
213:   }
214:   VecRestoreArray(xx,&x);
215:   VecRestoreArray(bb,(PetscScalar**)&b);
216:   return(0);
217: }

221: PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
222: {
223:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
224:   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
225:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
226:   PetscErrorCode     ierr;
227:   PetscInt           m = a->mbs,i,i2,nz,idx;
228:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

231:   its = its*lits;
232:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
233:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
234:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
235:   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
236:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

238:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

240:   diag  = a->diag;
241:   idiag = a->idiag;
242:   VecGetArray(xx,&x);
243:   VecGetArray(bb,(PetscScalar**)&b);

245:   if (flag & SOR_ZERO_INITIAL_GUESS) {
246:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
247:       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
248:       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
249:       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
250:       i2     = 3;
251:       idiag += 9;
252:       for (i=1; i<m; i++) {
253:         v     = aa + 9*ai[i];
254:         vi    = aj + ai[i];
255:         nz    = diag[i] - ai[i];
256:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
257:         while (nz--) {
258:           idx  = 3*(*vi++);
259:           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
260:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
261:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
262:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
263:           v   += 9;
264:         }
265:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
266:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
267:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
268:         idiag   += 9;
269:         i2      += 3;
270:       }
271:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
272:       PetscLogFlops(9*(a->nz));
273:     }
274:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
275:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
276:       i2    = 0;
277:       mdiag = a->idiag+9*a->mbs;
278:       for (i=0; i<m; i++) {
279:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
280:         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
281:         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
282:         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
283:         mdiag  += 9;
284:         i2     += 3;
285:       }
286:       PetscLogFlops(15*m);
287:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
288:       PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
289:     }
290:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
291:       idiag   = a->idiag+9*a->mbs - 9;
292:       i2      = 3*m - 3;
293:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
294:       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
295:       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
296:       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
297:       idiag -= 9;
298:       i2    -= 3;
299:       for (i=m-2; i>=0; i--) {
300:         v     = aa + 9*(diag[i]+1);
301:         vi    = aj + diag[i] + 1;
302:         nz    = ai[i+1] - diag[i] - 1;
303:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
304:         while (nz--) {
305:           idx  = 3*(*vi++);
306:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
307:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
308:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
309:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
310:           v   += 9;
311:         }
312:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
313:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
314:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
315:         idiag   -= 9;
316:         i2      -= 3;
317:       }
318:       PetscLogFlops(9*(a->nz));
319:     }
320:   } else {
321:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
322:   }
323:   VecRestoreArray(xx,&x);
324:   VecRestoreArray(bb,(PetscScalar**)&b);
325:   return(0);
326: }

330: PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
331: {
332:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
333:   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
334:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
335:   PetscErrorCode     ierr;
336:   PetscInt           m = a->mbs,i,i2,nz,idx;
337:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

340:   its = its*lits;
341:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
342:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
343:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
344:   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
345:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

347:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

349:   diag  = a->diag;
350:   idiag = a->idiag;
351:   VecGetArray(xx,&x);
352:   VecGetArray(bb,(PetscScalar**)&b);

354:   if (flag & SOR_ZERO_INITIAL_GUESS) {
355:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
356:       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
357:       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
358:       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
359:       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
360:       i2     = 4;
361:       idiag += 16;
362:       for (i=1; i<m; i++) {
363:         v     = aa + 16*ai[i];
364:         vi    = aj + ai[i];
365:         nz    = diag[i] - ai[i];
366:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
367:         while (nz--) {
368:           idx  = 4*(*vi++);
369:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
370:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
371:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
372:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
373:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
374:           v   += 16;
375:         }
376:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
377:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
378:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
379:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
380:         idiag   += 16;
381:         i2      += 4;
382:       }
383:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
384:       PetscLogFlops(16*(a->nz));
385:     }
386:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
387:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
388:       i2    = 0;
389:       mdiag = a->idiag+16*a->mbs;
390:       for (i=0; i<m; i++) {
391:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
392:         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
393:         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
394:         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
395:         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
396:         mdiag  += 16;
397:         i2     += 4;
398:       }
399:       PetscLogFlops(28*m);
400:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
401:       PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
402:     }
403:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
404:       idiag   = a->idiag+16*a->mbs - 16;
405:       i2      = 4*m - 4;
406:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
407:       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
408:       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
409:       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
410:       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
411:       idiag -= 16;
412:       i2    -= 4;
413:       for (i=m-2; i>=0; i--) {
414:         v     = aa + 16*(diag[i]+1);
415:         vi    = aj + diag[i] + 1;
416:         nz    = ai[i+1] - diag[i] - 1;
417:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
418:         while (nz--) {
419:           idx  = 4*(*vi++);
420:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
421:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
422:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
423:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
424:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
425:           v   += 16;
426:         }
427:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
428:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
429:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
430:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
431:         idiag   -= 16;
432:         i2      -= 4;
433:       }
434:       PetscLogFlops(16*(a->nz));
435:     }
436:   } else {
437:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
438:   }
439:   VecRestoreArray(xx,&x);
440:   VecRestoreArray(bb,(PetscScalar**)&b);
441:   return(0);
442: }

446: PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
447: {
448:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
449:   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
450:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
451:   PetscErrorCode     ierr;
452:   PetscInt           m = a->mbs,i,i2,nz,idx;
453:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

456:   its = its*lits;
457:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
458:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
459:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
460:   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
461:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

463:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

465:   diag  = a->diag;
466:   idiag = a->idiag;
467:   VecGetArray(xx,&x);
468:   VecGetArray(bb,(PetscScalar**)&b);

470:   if (flag & SOR_ZERO_INITIAL_GUESS) {
471:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
472:       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
473:       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
474:       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
475:       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
476:       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
477:       i2     = 5;
478:       idiag += 25;
479:       for (i=1; i<m; i++) {
480:         v     = aa + 25*ai[i];
481:         vi    = aj + ai[i];
482:         nz    = diag[i] - ai[i];
483:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
484:         while (nz--) {
485:           idx  = 5*(*vi++);
486:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
487:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
488:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
489:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
490:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
491:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
492:           v   += 25;
493:         }
494:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
495:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
496:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
497:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
498:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
499:         idiag   += 25;
500:         i2      += 5;
501:       }
502:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
503:       PetscLogFlops(25*(a->nz));
504:     }
505:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
506:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
507:       i2    = 0;
508:       mdiag = a->idiag+25*a->mbs;
509:       for (i=0; i<m; i++) {
510:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
511:         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
512:         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
513:         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
514:         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
515:         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
516:         mdiag  += 25;
517:         i2     += 5;
518:       }
519:       PetscLogFlops(45*m);
520:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
521:       PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
522:     }
523:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
524:       idiag   = a->idiag+25*a->mbs - 25;
525:       i2      = 5*m - 5;
526:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
527:       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
528:       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
529:       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
530:       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
531:       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
532:       idiag -= 25;
533:       i2    -= 5;
534:       for (i=m-2; i>=0; i--) {
535:         v     = aa + 25*(diag[i]+1);
536:         vi    = aj + diag[i] + 1;
537:         nz    = ai[i+1] - diag[i] - 1;
538:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
539:         while (nz--) {
540:           idx  = 5*(*vi++);
541:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
542:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
543:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
544:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
545:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
546:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
547:           v   += 25;
548:         }
549:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
550:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
551:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
552:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
553:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
554:         idiag   -= 25;
555:         i2      -= 5;
556:       }
557:       PetscLogFlops(25*(a->nz));
558:     }
559:   } else {
560:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
561:   }
562:   VecRestoreArray(xx,&x);
563:   VecRestoreArray(bb,(PetscScalar**)&b);
564:   return(0);
565: }

567: /*
568:     Special version for direct calls from Fortran (Used in PETSc-fun3d)
569: */
570: #if defined(PETSC_HAVE_FORTRAN_CAPS)
571: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
572: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
573: #define matsetvaluesblocked4_ matsetvaluesblocked4
574: #endif

579: void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
580: {
581:   Mat               A = *AA;
582:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
583:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
584:   PetscInt          *ai=a->i,*ailen=a->ilen;
585:   PetscInt          *aj=a->j,stepval,lastcol = -1;
586:   const PetscScalar *value = v;
587:   MatScalar         *ap,*aa = a->a,*bap;

590:   if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
591:   stepval = (n-1)*4;
592:   for (k=0; k<m; k++) { /* loop over added rows */
593:     row  = im[k];
594:     rp   = aj + ai[row];
595:     ap   = aa + 16*ai[row];
596:     nrow = ailen[row];
597:     low  = 0;
598:     high = nrow;
599:     for (l=0; l<n; l++) { /* loop over added columns */
600:       col = in[l];
601:       if (col <= lastcol) low = 0; else high = nrow;
602:       lastcol = col;
603:       value = v + k*(stepval+4 + l)*4;
604:       while (high-low > 7) {
605:         t = (low+high)/2;
606:         if (rp[t] > col) high = t;
607:         else             low  = t;
608:       }
609:       for (i=low; i<high; i++) {
610:         if (rp[i] > col) break;
611:         if (rp[i] == col) {
612:           bap  = ap +  16*i;
613:           for (ii=0; ii<4; ii++,value+=stepval) {
614:             for (jj=ii; jj<16; jj+=4) {
615:               bap[jj] += *value++;
616:             }
617:           }
618:           goto noinsert2;
619:         }
620:       }
621:       N = nrow++ - 1;
622:       high++; /* added new column index thus must search to one higher than before */
623:       /* shift up all the later entries in this row */
624:       for (ii=N; ii>=i; ii--) {
625:         rp[ii+1] = rp[ii];
626:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
627:       }
628:       if (N >= i) {
629:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
630:       }
631:       rp[i] = col;
632:       bap   = ap +  16*i;
633:       for (ii=0; ii<4; ii++,value+=stepval) {
634:         for (jj=ii; jj<16; jj+=4) {
635:           bap[jj] = *value++;
636:         }
637:       }
638:       noinsert2:;
639:       low = i;
640:     }
641:     ailen[row] = nrow;
642:   }
643:   PetscFunctionReturnVoid();
644: }

647: #if defined(PETSC_HAVE_FORTRAN_CAPS)
648: #define matsetvalues4_ MATSETVALUES4
649: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
650: #define matsetvalues4_ matsetvalues4
651: #endif

656: void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
657: {
658:   Mat         A = *AA;
659:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
660:   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
661:   PetscInt    *ai=a->i,*ailen=a->ilen;
662:   PetscInt    *aj=a->j,brow,bcol;
663:   PetscInt    ridx,cidx,lastcol = -1;
664:   MatScalar   *ap,value,*aa=a->a,*bap;
665: 
667:   for (k=0; k<m; k++) { /* loop over added rows */
668:     row  = im[k]; brow = row/4;
669:     rp   = aj + ai[brow];
670:     ap   = aa + 16*ai[brow];
671:     nrow = ailen[brow];
672:     low  = 0;
673:     high = nrow;
674:     for (l=0; l<n; l++) { /* loop over added columns */
675:       col = in[l]; bcol = col/4;
676:       ridx = row % 4; cidx = col % 4;
677:       value = v[l + k*n];
678:       if (col <= lastcol) low = 0; else high = nrow;
679:       lastcol = col;
680:       while (high-low > 7) {
681:         t = (low+high)/2;
682:         if (rp[t] > bcol) high = t;
683:         else              low  = t;
684:       }
685:       for (i=low; i<high; i++) {
686:         if (rp[i] > bcol) break;
687:         if (rp[i] == bcol) {
688:           bap  = ap +  16*i + 4*cidx + ridx;
689:           *bap += value;
690:           goto noinsert1;
691:         }
692:       }
693:       N = nrow++ - 1;
694:       high++; /* added new column thus must search to one higher than before */
695:       /* shift up all the later entries in this row */
696:       for (ii=N; ii>=i; ii--) {
697:         rp[ii+1] = rp[ii];
698:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
699:       }
700:       if (N>=i) {
701:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
702:       }
703:       rp[i]                    = bcol;
704:       ap[16*i + 4*cidx + ridx] = value;
705:       noinsert1:;
706:       low = i;
707:     }
708:     ailen[brow] = nrow;
709:   }
710:   PetscFunctionReturnVoid();
711: }

714: /*  UGLY, ugly, ugly
715:    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 
716:    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 
717:    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
718:    converts the entries into single precision and then calls ..._MatScalar() to put them
719:    into the single precision data structures.
720: */
721: #if defined(PETSC_USE_MAT_SINGLE)
722: EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
723: #else
724: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
725: #endif

727: #define CHUNKSIZE  10

729: /*
730:      Checks for missing diagonals
731: */
734: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
735: {
736:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
738:   PetscInt       *diag,*jj = a->j,i;

741:   MatMarkDiagonal_SeqBAIJ(A);
742:   diag = a->diag;
743:   for (i=0; i<a->mbs; i++) {
744:     if (jj[diag[i]] != i) {
745:       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
746:     }
747:   }
748:   return(0);
749: }

753: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
754: {
755:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
757:   PetscInt       i,j,m = a->mbs;

760:   if (!a->diag) {
761:     PetscMalloc(m*sizeof(PetscInt),&a->diag);
762:   }
763:   for (i=0; i<m; i++) {
764:     a->diag[i] = a->i[i+1];
765:     for (j=a->i[i]; j<a->i[i+1]; j++) {
766:       if (a->j[j] == i) {
767:         a->diag[i] = j;
768:         break;
769:       }
770:     }
771:   }
772:   return(0);
773: }


776: EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);

780: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
781: {
782:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
784:   PetscInt     i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs;
785:   PetscInt     *tia, *tja;

788:   *nn = n;
789:   if (!ia) return(0);
790:   if (symmetric) {
791:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);
792:   } else {
793:     tia = a->i; tja = a->j;
794:   }
795: 
796:   if (!blockcompressed) {
797:     /* malloc & create the natural set of indices */
798:     PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);
799:     for (i=0; i<n+1; i++) {
800:       for (j=0; j<bs; j++) {
801:         *ia[i*bs+j] = tia[i]*bs+j;
802:       }
803:     }
804:     for (i=0; i<nz; i++) {
805:       for (j=0; j<bs; j++) {
806:         *ja[i*bs+j] = tia[i]*bs+j;
807:       }
808:     }
809:     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
810:       PetscFree(tia);
811:       PetscFree(tja);
812:     }
813:   } else {
814:     *ia = tia;
815:     *ja = tja;
816:   }
817:   if (oshift == 1) {
818:     for (i=0; i<nz; i++) (*ja)[i]++;
819:     for (i=0; i<n+1; i++) (*ia)[i]++;
820:   }
821:   return(0);
822: }

826: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
827: {
828:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
830:   PetscInt       i,n = a->mbs,nz = a->i[n];

833:   if (!ia) return(0);
834:   if (!blockcompressed) {
835:     PetscFree2(*ia,*ja);
836:   }else if (symmetric) {
837:     PetscFree(*ia);
838:     PetscFree(*ja);
839:   } else if (oshift == 1) { /* blockcompressed */
840:     for (i=0; i<nz; i++) a->j[i]--;
841:     for (i=0; i<n+1; i++) a->i[i]--;
842:   }
843:   return(0);
844: }

848: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
849: {
850:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

854: #if defined(PETSC_USE_LOG)
855:   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
856: #endif
857:   MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
858:   if (a->row) {
859:     ISDestroy(a->row);
860:   }
861:   if (a->col) {
862:     ISDestroy(a->col);
863:   }
864:   PetscFree(a->diag);
865:   PetscFree(a->idiag);
866:   PetscFree2(a->imax,a->ilen);
867:   PetscFree(a->solve_work);
868:   PetscFree(a->mult_work);
869:   if (a->icol) {ISDestroy(a->icol);}
870:   PetscFree(a->saved_values);
871: #if defined(PETSC_USE_MAT_SINGLE)
872:   PetscFree(a->setvaluescopy);
873: #endif
874:   PetscFree(a->xtoy);
875:   if (a->compressedrow.use){PetscFree(a->compressedrow.i);}

877:   PetscFree(a);

879:   PetscObjectChangeTypeName((PetscObject)A,0);
880:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);
881:   PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);
882:   PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);
883:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);
884:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);
885:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);
886:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);
887:   return(0);
888: }

892: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
893: {
894:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

898:   switch (op) {
899:   case MAT_ROW_ORIENTED:
900:     a->roworiented    = PETSC_TRUE;
901:     break;
902:   case MAT_COLUMN_ORIENTED:
903:     a->roworiented    = PETSC_FALSE;
904:     break;
905:   case MAT_COLUMNS_SORTED:
906:     a->sorted         = PETSC_TRUE;
907:     break;
908:   case MAT_COLUMNS_UNSORTED:
909:     a->sorted         = PETSC_FALSE;
910:     break;
911:   case MAT_KEEP_ZEROED_ROWS:
912:     a->keepzeroedrows = PETSC_TRUE;
913:     break;
914:   case MAT_NO_NEW_NONZERO_LOCATIONS:
915:     a->nonew          = 1;
916:     break;
917:   case MAT_NEW_NONZERO_LOCATION_ERR:
918:     a->nonew          = -1;
919:     break;
920:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
921:     a->nonew          = -2;
922:     break;
923:   case MAT_YES_NEW_NONZERO_LOCATIONS:
924:     a->nonew          = 0;
925:     break;
926:   case MAT_ROWS_SORTED:
927:   case MAT_ROWS_UNSORTED:
928:   case MAT_YES_NEW_DIAGONALS:
929:   case MAT_IGNORE_OFF_PROC_ENTRIES:
930:   case MAT_USE_HASH_TABLE:
931:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
932:     break;
933:   case MAT_NO_NEW_DIAGONALS:
934:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
935:   case MAT_SYMMETRIC:
936:   case MAT_STRUCTURALLY_SYMMETRIC:
937:   case MAT_NOT_SYMMETRIC:
938:   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
939:   case MAT_HERMITIAN:
940:   case MAT_NOT_HERMITIAN:
941:   case MAT_SYMMETRY_ETERNAL:
942:   case MAT_NOT_SYMMETRY_ETERNAL:
943:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
944:     break;
945:   default:
946:     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
947:   }
948:   return(0);
949: }

953: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
954: {
955:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
957:   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
958:   MatScalar      *aa,*aa_i;
959:   PetscScalar    *v_i;

962:   bs  = A->rmap.bs;
963:   ai  = a->i;
964:   aj  = a->j;
965:   aa  = a->a;
966:   bs2 = a->bs2;
967: 
968:   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
969: 
970:   bn  = row/bs;   /* Block number */
971:   bp  = row % bs; /* Block Position */
972:   M   = ai[bn+1] - ai[bn];
973:   *nz = bs*M;
974: 
975:   if (v) {
976:     *v = 0;
977:     if (*nz) {
978:       PetscMalloc((*nz)*sizeof(PetscScalar),v);
979:       for (i=0; i<M; i++) { /* for each block in the block row */
980:         v_i  = *v + i*bs;
981:         aa_i = aa + bs2*(ai[bn] + i);
982:         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
983:       }
984:     }
985:   }

987:   if (idx) {
988:     *idx = 0;
989:     if (*nz) {
990:       PetscMalloc((*nz)*sizeof(PetscInt),idx);
991:       for (i=0; i<M; i++) { /* for each block in the block row */
992:         idx_i = *idx + i*bs;
993:         itmp  = bs*aj[ai[bn] + i];
994:         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
995:       }
996:     }
997:   }
998:   return(0);
999: }

1003: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1004: {

1008:   if (idx) {PetscFree(*idx);}
1009:   if (v)   {PetscFree(*v);}
1010:   return(0);
1011: }

1015: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
1016: {
1017:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1018:   Mat            C;
1020:   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1021:   PetscInt       *rows,*cols,bs2=a->bs2;
1022:   PetscScalar    *array;

1025:   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1026:   PetscMalloc((1+nbs)*sizeof(PetscInt),&col);
1027:   PetscMemzero(col,(1+nbs)*sizeof(PetscInt));

1029: #if defined(PETSC_USE_MAT_SINGLE)
1030:   PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
1031:   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1032: #else
1033:   array = a->a;
1034: #endif

1036:   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1037:   MatCreate(A->comm,&C);
1038:   MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);
1039:   MatSetType(C,A->type_name);
1040:   MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);
1041:   PetscFree(col);
1042:   PetscMalloc(2*bs*sizeof(PetscInt),&rows);
1043:   cols = rows + bs;
1044:   for (i=0; i<mbs; i++) {
1045:     cols[0] = i*bs;
1046:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1047:     len = ai[i+1] - ai[i];
1048:     for (j=0; j<len; j++) {
1049:       rows[0] = (*aj++)*bs;
1050:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1051:       MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
1052:       array += bs2;
1053:     }
1054:   }
1055:   PetscFree(rows);
1056: #if defined(PETSC_USE_MAT_SINGLE)
1057:   PetscFree(array);
1058: #endif
1059: 
1060:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1061:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1062: 
1063:   if (B) {
1064:     *B = C;
1065:   } else {
1066:     MatHeaderCopy(A,C);
1067:   }
1068:   return(0);
1069: }

1073: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1074: {
1075:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1077:   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1078:   int            fd;
1079:   PetscScalar    *aa;
1080:   FILE           *file;

1083:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1084:   PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);
1085:   col_lens[0] = MAT_FILE_COOKIE;

1087:   col_lens[1] = A->rmap.N;
1088:   col_lens[2] = A->cmap.n;
1089:   col_lens[3] = a->nz*bs2;

1091:   /* store lengths of each row and write (including header) to file */
1092:   count = 0;
1093:   for (i=0; i<a->mbs; i++) {
1094:     for (j=0; j<bs; j++) {
1095:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1096:     }
1097:   }
1098:   PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);
1099:   PetscFree(col_lens);

1101:   /* store column indices (zero start index) */
1102:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);
1103:   count = 0;
1104:   for (i=0; i<a->mbs; i++) {
1105:     for (j=0; j<bs; j++) {
1106:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1107:         for (l=0; l<bs; l++) {
1108:           jj[count++] = bs*a->j[k] + l;
1109:         }
1110:       }
1111:     }
1112:   }
1113:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1114:   PetscFree(jj);

1116:   /* store nonzero values */
1117:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1118:   count = 0;
1119:   for (i=0; i<a->mbs; i++) {
1120:     for (j=0; j<bs; j++) {
1121:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1122:         for (l=0; l<bs; l++) {
1123:           aa[count++] = a->a[bs2*k + l*bs + j];
1124:         }
1125:       }
1126:     }
1127:   }
1128:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1129:   PetscFree(aa);

1131:   PetscViewerBinaryGetInfoPointer(viewer,&file);
1132:   if (file) {
1133:     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1134:   }
1135:   return(0);
1136: }

1140: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1141: {
1142:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1143:   PetscErrorCode    ierr;
1144:   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1145:   PetscViewerFormat format;

1148:   PetscViewerGetFormat(viewer,&format);
1149:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1150:     PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);
1151:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1152:     Mat aij;
1153:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1154:     MatView(aij,viewer);
1155:     MatDestroy(aij);
1156:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1157:      return(0);
1158:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1159:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1160:     for (i=0; i<a->mbs; i++) {
1161:       for (j=0; j<bs; j++) {
1162:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1163:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1164:           for (l=0; l<bs; l++) {
1165: #if defined(PETSC_USE_COMPLEX)
1166:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1167:               PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1168:                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1169:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1170:               PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1171:                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1172:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1173:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1174:             }
1175: #else
1176:             if (a->a[bs2*k + l*bs + j] != 0.0) {
1177:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1178:             }
1179: #endif
1180:           }
1181:         }
1182:         PetscViewerASCIIPrintf(viewer,"\n");
1183:       }
1184:     }
1185:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1186:   } else {
1187:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1188:     for (i=0; i<a->mbs; i++) {
1189:       for (j=0; j<bs; j++) {
1190:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1191:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1192:           for (l=0; l<bs; l++) {
1193: #if defined(PETSC_USE_COMPLEX)
1194:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1195:               PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1196:                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1197:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1198:               PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1199:                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1200:             } else {
1201:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1202:             }
1203: #else
1204:             PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1205: #endif
1206:           }
1207:         }
1208:         PetscViewerASCIIPrintf(viewer,"\n");
1209:       }
1210:     }
1211:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1212:   }
1213:   PetscViewerFlush(viewer);
1214:   return(0);
1215: }

1219: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1220: {
1221:   Mat            A = (Mat) Aa;
1222:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1224:   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1225:   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1226:   MatScalar      *aa;
1227:   PetscViewer    viewer;


1231:   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1232:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);

1234:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

1236:   /* loop over matrix elements drawing boxes */
1237:   color = PETSC_DRAW_BLUE;
1238:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1239:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1240:       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1241:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1242:       aa = a->a + j*bs2;
1243:       for (k=0; k<bs; k++) {
1244:         for (l=0; l<bs; l++) {
1245:           if (PetscRealPart(*aa++) >=  0.) continue;
1246:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1247:         }
1248:       }
1249:     }
1250:   }
1251:   color = PETSC_DRAW_CYAN;
1252:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1253:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1254:       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1255:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1256:       aa = a->a + j*bs2;
1257:       for (k=0; k<bs; k++) {
1258:         for (l=0; l<bs; l++) {
1259:           if (PetscRealPart(*aa++) != 0.) continue;
1260:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1261:         }
1262:       }
1263:     }
1264:   }

1266:   color = PETSC_DRAW_RED;
1267:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1268:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1269:       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1270:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1271:       aa = a->a + j*bs2;
1272:       for (k=0; k<bs; k++) {
1273:         for (l=0; l<bs; l++) {
1274:           if (PetscRealPart(*aa++) <= 0.) continue;
1275:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1276:         }
1277:       }
1278:     }
1279:   }
1280:   return(0);
1281: }

1285: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1286: {
1288:   PetscReal      xl,yl,xr,yr,w,h;
1289:   PetscDraw      draw;
1290:   PetscTruth     isnull;


1294:   PetscViewerDrawGetDraw(viewer,0,&draw);
1295:   PetscDrawIsNull(draw,&isnull); if (isnull) return(0);

1297:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1298:   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1299:   xr += w;    yr += h;  xl = -w;     yl = -h;
1300:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1301:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1302:   PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
1303:   return(0);
1304: }

1308: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1309: {
1311:   PetscTruth     iascii,isbinary,isdraw;

1314:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1315:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1316:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1317:   if (iascii){
1318:     MatView_SeqBAIJ_ASCII(A,viewer);
1319:   } else if (isbinary) {
1320:     MatView_SeqBAIJ_Binary(A,viewer);
1321:   } else if (isdraw) {
1322:     MatView_SeqBAIJ_Draw(A,viewer);
1323:   } else {
1324:     Mat B;
1325:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1326:     MatView(B,viewer);
1327:     MatDestroy(B);
1328:   }
1329:   return(0);
1330: }


1335: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1336: {
1337:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1338:   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1339:   PetscInt    *ai = a->i,*ailen = a->ilen;
1340:   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1341:   MatScalar   *ap,*aa = a->a,zero = 0.0;

1344:   for (k=0; k<m; k++) { /* loop over rows */
1345:     row  = im[k]; brow = row/bs;
1346:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1347:     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1348:     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1349:     nrow = ailen[brow];
1350:     for (l=0; l<n; l++) { /* loop over columns */
1351:       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1352:       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1353:       col  = in[l] ;
1354:       bcol = col/bs;
1355:       cidx = col%bs;
1356:       ridx = row%bs;
1357:       high = nrow;
1358:       low  = 0; /* assume unsorted */
1359:       while (high-low > 5) {
1360:         t = (low+high)/2;
1361:         if (rp[t] > bcol) high = t;
1362:         else             low  = t;
1363:       }
1364:       for (i=low; i<high; i++) {
1365:         if (rp[i] > bcol)