Actual source code: mpisbaij.c

  1: #define PETSCMAT_DLL

 3:  #include src/mat/impls/baij/mpi/mpibaij.h
 4:  #include mpisbaij.h
 5:  #include src/mat/impls/sbaij/seq/sbaij.h

  7: EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
  8: EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
  9: EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
 10: EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
 11: EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
 12: EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
 13: EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
 14: EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
 15: EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
 16: EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
 17: EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
 18: EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
 19: EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
 20: EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
 21: EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);

 23: /*  UGLY, ugly, ugly
 24:    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 
 25:    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 
 26:    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
 27:    converts the entries into single precision and then calls ..._MatScalar() to put them
 28:    into the single precision data structures.
 29: */
 30: #if defined(PETSC_USE_MAT_SINGLE)
 31: EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
 32: EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
 33: EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
 34: EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
 35: EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
 36: #else
 37: #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
 38: #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
 39: #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
 40: #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
 41: #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
 42: #endif

 47: PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
 48: {
 49:   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;

 53:   MatStoreValues(aij->A);
 54:   MatStoreValues(aij->B);
 55:   return(0);
 56: }

 62: PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
 63: {
 64:   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;

 68:   MatRetrieveValues(aij->A);
 69:   MatRetrieveValues(aij->B);
 70:   return(0);
 71: }


 75: #define CHUNKSIZE  10

 77: #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
 78: { \
 79:  \
 80:     brow = row/bs;  \
 81:     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
 82:     rmax = aimax[brow]; nrow = ailen[brow]; \
 83:       bcol = col/bs; \
 84:       ridx = row % bs; cidx = col % bs; \
 85:       low = 0; high = nrow; \
 86:       while (high-low > 3) { \
 87:         t = (low+high)/2; \
 88:         if (rp[t] > bcol) high = t; \
 89:         else              low  = t; \
 90:       } \
 91:       for (_i=low; _i<high; _i++) { \
 92:         if (rp[_i] > bcol) break; \
 93:         if (rp[_i] == bcol) { \
 94:           bap  = ap +  bs2*_i + bs*cidx + ridx; \
 95:           if (addv == ADD_VALUES) *bap += value;  \
 96:           else                    *bap  = value;  \
 97:           goto a_noinsert; \
 98:         } \
 99:       } \
100:       if (a->nonew == 1) goto a_noinsert; \
101:       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
102:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
103:       N = nrow++ - 1;  \
104:       /* shift up all the later entries in this row */ \
105:       for (ii=N; ii>=_i; ii--) { \
106:         rp[ii+1] = rp[ii]; \
107:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); \
108:       } \
109:       if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar)); }  \
110:       rp[_i]                      = bcol;  \
111:       ap[bs2*_i + bs*cidx + ridx] = value;  \
112:       a_noinsert:; \
113:     ailen[brow] = nrow; \
114: } 
115: #ifndef MatSetValues_SeqBAIJ_B_Private
116: #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
117: { \
118:     brow = row/bs;  \
119:     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
120:     rmax = bimax[brow]; nrow = bilen[brow]; \
121:       bcol = col/bs; \
122:       ridx = row % bs; cidx = col % bs; \
123:       low = 0; high = nrow; \
124:       while (high-low > 3) { \
125:         t = (low+high)/2; \
126:         if (rp[t] > bcol) high = t; \
127:         else              low  = t; \
128:       } \
129:       for (_i=low; _i<high; _i++) { \
130:         if (rp[_i] > bcol) break; \
131:         if (rp[_i] == bcol) { \
132:           bap  = ap +  bs2*_i + bs*cidx + ridx; \
133:           if (addv == ADD_VALUES) *bap += value;  \
134:           else                    *bap  = value;  \
135:           goto b_noinsert; \
136:         } \
137:       } \
138:       if (b->nonew == 1) goto b_noinsert; \
139:       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
140:       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
141:       N = nrow++ - 1;  \
142:       /* shift up all the later entries in this row */ \
143:       for (ii=N; ii>=_i; ii--) { \
144:         rp[ii+1] = rp[ii]; \
145:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); \
146:       } \
147:       if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));}  \
148:       rp[_i]                      = bcol;  \
149:       ap[bs2*_i + bs*cidx + ridx] = value;  \
150:       b_noinsert:; \
151:     bilen[brow] = nrow; \
152: } 
153: #endif

155: #if defined(PETSC_USE_MAT_SINGLE)
158: PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
159: {
160:   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
162:   PetscInt       i,N = m*n;
163:   MatScalar      *vsingle;

166:   if (N > b->setvalueslen) {
167:     PetscFree(b->setvaluescopy);
168:     PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
169:     b->setvalueslen  = N;
170:   }
171:   vsingle = b->setvaluescopy;

173:   for (i=0; i<N; i++) {
174:     vsingle[i] = v[i];
175:   }
176:   MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
177:   return(0);
178: }

182: PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
183: {
184:   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
186:   PetscInt       i,N = m*n*b->bs2;
187:   MatScalar      *vsingle;

190:   if (N > b->setvalueslen) {
191:     PetscFree(b->setvaluescopy);
192:     PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
193:     b->setvalueslen  = N;
194:   }
195:   vsingle = b->setvaluescopy;
196:   for (i=0; i<N; i++) {
197:     vsingle[i] = v[i];
198:   }
199:   MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
200:   return(0);
201: }
202: #endif

204: /* Only add/insert a(i,j) with i<=j (blocks). 
205:    Any a(i,j) with i>j input by user is ingored. 
206: */
209: PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
210: {
211:   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
212:   MatScalar      value;
213:   PetscTruth     roworiented = baij->roworiented;
215:   PetscInt       i,j,row,col;
216:   PetscInt       rstart_orig=mat->rmap.rstart;
217:   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
218:   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;

220:   /* Some Variables required in the macro */
221:   Mat            A = baij->A;
222:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
223:   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
224:   MatScalar      *aa=a->a;

226:   Mat            B = baij->B;
227:   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
228:   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
229:   MatScalar     *ba=b->a;

231:   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
232:   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
233:   MatScalar     *ap,*bap;

235:   /* for stash */
236:   PetscInt      n_loc, *in_loc = PETSC_NULL;
237:   MatScalar     *v_loc = PETSC_NULL;


241:   if (!baij->donotstash){
242:     if (n > baij->n_loc) {
243:       PetscFree(baij->in_loc);
244:       PetscFree(baij->v_loc);
245:       PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);
246:       PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);
247:       baij->n_loc = n;
248:     }
249:     in_loc = baij->in_loc;
250:     v_loc  = baij->v_loc;
251:   }

253:   for (i=0; i<m; i++) {
254:     if (im[i] < 0) continue;
255: #if defined(PETSC_USE_DEBUG)
256:     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
257: #endif
258:     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
259:       row = im[i] - rstart_orig;              /* local row index */
260:       for (j=0; j<n; j++) {
261:         if (im[i]/bs > in[j]/bs){
262:           if (a->ignore_ltriangular){
263:             continue;    /* ignore lower triangular blocks */
264:           } else {
265:             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)");
266:           }
267:         }
268:         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
269:           col = in[j] - cstart_orig;          /* local col index */
270:           brow = row/bs; bcol = col/bs;
271:           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
272:           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
273:           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
274:           /* MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv); */
275:         } else if (in[j] < 0) continue;
276: #if defined(PETSC_USE_DEBUG)
277:         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
278: #endif
279:         else {  /* off-diag entry (B) */
280:           if (mat->was_assembled) {
281:             if (!baij->colmap) {
282:               CreateColmap_MPIBAIJ_Private(mat);
283:             }
284: #if defined (PETSC_USE_CTABLE)
285:             PetscTableFind(baij->colmap,in[j]/bs + 1,&col);
286:             col  = col - 1;
287: #else
288:             col = baij->colmap[in[j]/bs] - 1;
289: #endif
290:             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
291:               DisAssemble_MPISBAIJ(mat);
292:               col =  in[j];
293:               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
294:               B = baij->B;
295:               b = (Mat_SeqBAIJ*)(B)->data;
296:               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
297:               ba=b->a;
298:             } else col += in[j]%bs;
299:           } else col = in[j];
300:           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
301:           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
302:           /* MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv); */
303:         }
304:       }
305:     } else {  /* off processor entry */
306:       if (!baij->donotstash) {
307:         n_loc = 0;
308:         for (j=0; j<n; j++){
309:           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
310:           in_loc[n_loc] = in[j];
311:           if (roworiented) {
312:             v_loc[n_loc] = v[i*n+j];
313:           } else {
314:             v_loc[n_loc] = v[j*m+i];
315:           }
316:           n_loc++;
317:         }
318:         MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);
319:       }
320:     }
321:   }
322:   return(0);
323: }

327: PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
328: {
329:   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
330:   const MatScalar *value;
331:   MatScalar       *barray=baij->barray;
332:   PetscTruth      roworiented = baij->roworiented;
333:   PetscErrorCode  ierr;
334:   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
335:   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
336:   PetscInt        cend=baij->rendbs,bs=mat->rmap.bs,bs2=baij->bs2;

339:   if(!barray) {
340:     PetscMalloc(bs2*sizeof(MatScalar),&barray);
341:     baij->barray = barray;
342:   }

344:   if (roworiented) {
345:     stepval = (n-1)*bs;
346:   } else {
347:     stepval = (m-1)*bs;
348:   }
349:   for (i=0; i<m; i++) {
350:     if (im[i] < 0) continue;
351: #if defined(PETSC_USE_DEBUG)
352:     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
353: #endif
354:     if (im[i] >= rstart && im[i] < rend) {
355:       row = im[i] - rstart;
356:       for (j=0; j<n; j++) {
357:         /* If NumCol = 1 then a copy is not required */
358:         if ((roworiented) && (n == 1)) {
359:           barray = (MatScalar*) v + i*bs2;
360:         } else if((!roworiented) && (m == 1)) {
361:           barray = (MatScalar*) v + j*bs2;
362:         } else { /* Here a copy is required */
363:           if (roworiented) {
364:             value = v + i*(stepval+bs)*bs + j*bs;
365:           } else {
366:             value = v + j*(stepval+bs)*bs + i*bs;
367:           }
368:           for (ii=0; ii<bs; ii++,value+=stepval) {
369:             for (jj=0; jj<bs; jj++) {
370:               *barray++  = *value++;
371:             }
372:           }
373:           barray -=bs2;
374:         }
375: 
376:         if (in[j] >= cstart && in[j] < cend){
377:           col  = in[j] - cstart;
378:           MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);
379:         }
380:         else if (in[j] < 0) continue;
381: #if defined(PETSC_USE_DEBUG)
382:         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
383: #endif
384:         else {
385:           if (mat->was_assembled) {
386:             if (!baij->colmap) {
387:               CreateColmap_MPIBAIJ_Private(mat);
388:             }

390: #if defined(PETSC_USE_DEBUG)
391: #if defined (PETSC_USE_CTABLE)
392:             { PetscInt data;
393:               PetscTableFind(baij->colmap,in[j]+1,&data);
394:               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
395:             }
396: #else
397:             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
398: #endif
399: #endif
400: #if defined (PETSC_USE_CTABLE)
401:             PetscTableFind(baij->colmap,in[j]+1,&col);
402:             col  = (col - 1)/bs;
403: #else
404:             col = (baij->colmap[in[j]] - 1)/bs;
405: #endif
406:             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
407:               DisAssemble_MPISBAIJ(mat);
408:               col =  in[j];
409:             }
410:           }
411:           else col = in[j];
412:           MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);
413:         }
414:       }
415:     } else {
416:       if (!baij->donotstash) {
417:         if (roworiented) {
418:           MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
419:         } else {
420:           MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
421:         }
422:       }
423:     }
424:   }
425:   return(0);
426: }

430: PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
431: {
432:   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
434:   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
435:   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;

438:   for (i=0; i<m; i++) {
439:     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
440:     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
441:     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
442:       row = idxm[i] - bsrstart;
443:       for (j=0; j<n; j++) {
444:         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
445:         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
446:         if (idxn[j] >= bscstart && idxn[j] < bscend){
447:           col = idxn[j] - bscstart;
448:           MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);
449:         } else {
450:           if (!baij->colmap) {
451:             CreateColmap_MPIBAIJ_Private(mat);
452:           }
453: #if defined (PETSC_USE_CTABLE)
454:           PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);
455:           data --;
456: #else
457:           data = baij->colmap[idxn[j]/bs]-1;
458: #endif
459:           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
460:           else {
461:             col  = data + idxn[j]%bs;
462:             MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);
463:           }
464:         }
465:       }
466:     } else {
467:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
468:     }
469:   }
470:  return(0);
471: }

475: PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
476: {
477:   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
479:   PetscReal      sum[2],*lnorm2;

482:   if (baij->size == 1) {
483:      MatNorm(baij->A,type,norm);
484:   } else {
485:     if (type == NORM_FROBENIUS) {
486:       PetscMalloc(2*sizeof(PetscReal),&lnorm2);
487:        MatNorm(baij->A,type,lnorm2);
488:       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
489:        MatNorm(baij->B,type,lnorm2);
490:       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
491:       MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);
492:       *norm = sqrt(sum[0] + 2*sum[1]);
493:       PetscFree(lnorm2);
494:     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
495:       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
496:       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
497:       PetscReal    *rsum,*rsum2,vabs;
498:       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
499:       PetscInt     brow,bcol,col,bs=baij->A->rmap.bs,row,grow,gcol,mbs=amat->mbs;
500:       MatScalar    *v;

502:       PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&rsum);
503:       rsum2 = rsum + mat->cmap.N;
504:       PetscMemzero(rsum,mat->cmap.N*sizeof(PetscReal));
505:       /* Amat */
506:       v = amat->a; jj = amat->j;
507:       for (brow=0; brow<mbs; brow++) {
508:         grow = bs*(rstart + brow);
509:         nz = amat->i[brow+1] - amat->i[brow];
510:         for (bcol=0; bcol<nz; bcol++){
511:           gcol = bs*(rstart + *jj); jj++;
512:           for (col=0; col<bs; col++){
513:             for (row=0; row<bs; row++){
514:               vabs = PetscAbsScalar(*v); v++;
515:               rsum[gcol+col] += vabs;
516:               /* non-diagonal block */
517:               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
518:             }
519:           }
520:         }
521:       }
522:       /* Bmat */
523:       v = bmat->a; jj = bmat->j;
524:       for (brow=0; brow<mbs; brow++) {
525:         grow = bs*(rstart + brow);
526:         nz = bmat->i[brow+1] - bmat->i[brow];
527:         for (bcol=0; bcol<nz; bcol++){
528:           gcol = bs*garray[*jj]; jj++;
529:           for (col=0; col<bs; col++){
530:             for (row=0; row<bs; row++){
531:               vabs = PetscAbsScalar(*v); v++;
532:               rsum[gcol+col] += vabs;
533:               rsum[grow+row] += vabs;
534:             }
535:           }
536:         }
537:       }
538:       MPI_Allreduce(rsum,rsum2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);
539:       *norm = 0.0;
540:       for (col=0; col<mat->cmap.N; col++) {
541:         if (rsum2[col] > *norm) *norm = rsum2[col];
542:       }
543:       PetscFree(rsum);
544:     } else {
545:       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
546:     }
547:   }
548:   return(0);
549: }

553: PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
554: {
555:   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
557:   PetscInt       nstash,reallocs;
558:   InsertMode     addv;

561:   if (baij->donotstash) {
562:     return(0);
563:   }

565:   /* make sure all processors are either in INSERTMODE or ADDMODE */
566:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
567:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
568:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
569:   }
570:   mat->insertmode = addv; /* in case this processor had no cache */

572:   MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);
573:   MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);
574:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
575:   PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
576:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
577:   PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
578:   return(0);
579: }

583: PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
584: {
585:   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
586:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
588:   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
589:   PetscInt       *row,*col,other_disassembled;
590:   PetscMPIInt    n;
591:   PetscTruth     r1,r2,r3;
592:   MatScalar      *val;
593:   InsertMode     addv = mat->insertmode;

595:   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */

598:   if (!baij->donotstash) {
599:     while (1) {
600:       MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
601:       if (!flg) break;

603:       for (i=0; i<n;) {
604:         /* Now identify the consecutive vals belonging to the same row */
605:         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
606:         if (j < n) ncols = j-i;
607:         else       ncols = n-i;
608:         /* Now assemble all these values with a single function call */
609:         MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);
610:         i = j;
611:       }
612:     }
613:     MatStashScatterEnd_Private(&mat->stash);
614:     /* Now process the block-stash. Since the values are stashed column-oriented,
615:        set the roworiented flag to column oriented, and after MatSetValues() 
616:        restore the original flags */
617:     r1 = baij->roworiented;
618:     r2 = a->roworiented;
619:     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
620:     baij->roworiented = PETSC_FALSE;
621:     a->roworiented    = PETSC_FALSE;
622:     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
623:     while (1) {
624:       MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);
625:       if (!flg) break;
626: 
627:       for (i=0; i<n;) {
628:         /* Now identify the consecutive vals belonging to the same row */
629:         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
630:         if (j < n) ncols = j-i;
631:         else       ncols = n-i;
632:         MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);
633:         i = j;
634:       }
635:     }
636:     MatStashScatterEnd_Private(&mat->bstash);
637:     baij->roworiented = r1;
638:     a->roworiented    = r2;
639:     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
640:   }

642:   MatAssemblyBegin(baij->A,mode);
643:   MatAssemblyEnd(baij->A,mode);

645:   /* determine if any processor has disassembled, if so we must 
646:      also disassemble ourselfs, in order that we may reassemble. */
647:   /*
648:      if nonzero structure of submatrix B cannot change then we know that
649:      no processor disassembled thus we can skip this stuff
650:   */
651:   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
652:     MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
653:     if (mat->was_assembled && !other_disassembled) {
654:       DisAssemble_MPISBAIJ(mat);
655:     }
656:   }

658:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
659:     MatSetUpMultiply_MPISBAIJ(mat); /* setup Mvctx and sMvctx */
660:   }
661:   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
662:   MatAssemblyBegin(baij->B,mode);
663:   MatAssemblyEnd(baij->B,mode);
664: 
665:   PetscFree(baij->rowvalues);
666:   baij->rowvalues = 0;

668:   return(0);
669: }

674: static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
675: {
676:   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
677:   PetscErrorCode    ierr;
678:   PetscInt          bs = mat->rmap.bs;
679:   PetscMPIInt       size = baij->size,rank = baij->rank;
680:   PetscTruth        iascii,isdraw;
681:   PetscViewer       sviewer;
682:   PetscViewerFormat format;

685:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
686:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
687:   if (iascii) {
688:     PetscViewerGetFormat(viewer,&format);
689:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
690:       MatInfo info;
691:       MPI_Comm_rank(mat->comm,&rank);
692:       MatGetInfo(mat,MAT_LOCAL,&info);
693:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
694:               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
695:               mat->rmap.bs,(PetscInt)info.memory);
696:       MatGetInfo(baij->A,MAT_LOCAL,&info);
697:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);
698:       MatGetInfo(baij->B,MAT_LOCAL,&info);
699:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);
700:       PetscViewerFlush(viewer);
701:       PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
702:       VecScatterView(baij->Mvctx,viewer);
703:       return(0);
704:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
705:       PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);
706:       return(0);
707:     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
708:       return(0);
709:     }
710:   }

712:   if (isdraw) {
713:     PetscDraw  draw;
714:     PetscTruth isnull;
715:     PetscViewerDrawGetDraw(viewer,0,&draw);
716:     PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
717:   }

719:   if (size == 1) {
720:     PetscObjectSetName((PetscObject)baij->A,mat->name);
721:     MatView(baij->A,viewer);
722:   } else {
723:     /* assemble the entire matrix onto first processor. */
724:     Mat          A;
725:     Mat_SeqSBAIJ *Aloc;
726:     Mat_SeqBAIJ  *Bloc;
727:     PetscInt     M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
728:     MatScalar    *a;

730:     /* Should this be the same type as mat? */
731:     MatCreate(mat->comm,&A);
732:     if (!rank) {
733:       MatSetSizes(A,M,N,M,N);
734:     } else {
735:       MatSetSizes(A,0,0,M,N);
736:     }
737:     MatSetType(A,MATMPISBAIJ);
738:     MatMPISBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);
739:     PetscLogObjectParent(mat,A);

741:     /* copy over the A part */
742:     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
743:     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
744:     PetscMalloc(bs*sizeof(PetscInt),&rvals);

746:     for (i=0; i<mbs; i++) {
747:       rvals[0] = bs*(baij->rstartbs + i);
748:       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
749:       for (j=ai[i]; j<ai[i+1]; j++) {
750:         col = (baij->cstartbs+aj[j])*bs;
751:         for (k=0; k<bs; k++) {
752:           MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);
753:           col++; a += bs;
754:         }
755:       }
756:     }
757:     /* copy over the B part */
758:     Bloc = (Mat_SeqBAIJ*)baij->B->data;
759:     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
760:     for (i=0; i<mbs; i++) {
761: 
762:       rvals[0] = bs*(baij->rstartbs + i);
763:       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
764:       for (j=ai[i]; j<ai[i+1]; j++) {
765:         col = baij->garray[aj[j]]*bs;
766:         for (k=0; k<bs; k++) {
767:           MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
768:           col++; a += bs;
769:         }
770:       }
771:     }
772:     PetscFree(rvals);
773:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
774:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
775:     /* 
776:        Everyone has to call to draw the matrix since the graphics waits are
777:        synchronized across all processors that share the PetscDraw object
778:     */
779:     PetscViewerGetSingleton(viewer,&sviewer);
780:     if (!rank) {
781:       PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);
782:       MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);
783:     }
784:     PetscViewerRestoreSingleton(viewer,&sviewer);
785:     MatDestroy(A);
786:   }
787:   return(0);
788: }

792: PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
793: {
795:   PetscTruth     iascii,isdraw,issocket,isbinary;

798:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
799:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
800:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
801:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
802:   if (iascii || isdraw || issocket || isbinary) {
803:     MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);
804:   } else {
805:     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
806:   }
807:   return(0);
808: }

812: PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
813: {
814:   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;

818: #if defined(PETSC_USE_LOG)
819:   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
820: #endif
821:   MatStashDestroy_Private(&mat->stash);
822:   MatStashDestroy_Private(&mat->bstash);
823:   MatDestroy(baij->A);
824:   MatDestroy(baij->B);
825: #if defined (PETSC_USE_CTABLE)
826:   if (baij->colmap) {PetscTableDestroy(baij->colmap);}
827: #else
828:   PetscFree(baij->colmap);
829: #endif
830:   PetscFree(baij->garray);
831:   if (baij->lvec)   {VecDestroy(baij->lvec);}
832:   if (baij->Mvctx)  {VecScatterDestroy(baij->Mvctx);}
833:   if (baij->slvec0) {
834:     VecDestroy(baij->slvec0);
835:     VecDestroy(baij->slvec0b);
836:   }
837:   if (baij->slvec1) {
838:     VecDestroy(baij->slvec1);
839:     VecDestroy(baij->slvec1a);
840:     VecDestroy(baij->slvec1b);
841:   }
842:   if (baij->sMvctx)  {VecScatterDestroy(baij->sMvctx);}
843:   PetscFree(baij->rowvalues);
844:   PetscFree(baij->barray);
845:   PetscFree(baij->hd);
846: #if defined(PETSC_USE_MAT_SINGLE)
847:   PetscFree(baij->setvaluescopy);
848: #endif
849:   PetscFree(baij->in_loc);
850:   PetscFree(baij->v_loc);
851:   PetscFree(baij->rangebs);
852:   PetscFree(baij);

854:   PetscObjectChangeTypeName((PetscObject)mat,0);
855:   PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);
856:   PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);
857:   PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
858:   PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);
859:   return(0);
860: }

864: PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
865: {
866:   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
868:   PetscInt       nt,mbs=a->mbs,bs=A->rmap.bs;
869:   PetscScalar    *x,*from,zero=0.0;
870: 
872:   VecGetLocalSize(xx,&nt);
873:   if (nt != A->cmap.n) {
874:     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
875:   }

877:   /* diagonal part */
878:   (*a->A->ops->mult)(a->A,xx,a->slvec1a);
879:   VecSet(a->slvec1b,zero);

881:   /* subdiagonal part */
882:   (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);
883:   CHKMEMQ;
884:   /* copy x into the vec slvec0 */
885:   VecGetArray(a->slvec0,&from);
886:   VecGetArray(xx,&x);
887:   CHKMEMQ;
888:   PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));
889:   CHKMEMQ;
890:   VecRestoreArray(a->slvec0,&from);
891: 
892:   CHKMEMQ;
893:   VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
894:   CHKMEMQ;
895:   VecRestoreArray(xx,&x);
896:   CHKMEMQ;
897:   VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
898:     CHKMEMQ;
899:   /* supperdiagonal part */
900:   (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);
901:     CHKMEMQ;
902:   return(0);
903: }

907: PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
908: {
909:   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
911:   PetscInt       nt;

914:   VecGetLocalSize(xx,&nt);
915:   if (nt != A->cmap.n) {
916:     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
917:   }
918:   VecGetLocalSize(yy,&nt);
919:   if (nt != A->rmap.N) {
920:     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
921:   }

923:   VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
924:   /* do diagonal part */
925:   (*a->A->ops->mult)(a->A,xx,yy);
926:   /* do supperdiagonal part */
927:   VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
928:   (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
929:   /* do subdiagonal part */
930:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
931:   VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
932:   VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);

934:   return(0);
935: }

939: PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
940: {
941:   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
943:   PetscInt       mbs=a->mbs,bs=A->rmap.bs;
944:   PetscScalar    *x,*from,zero=0.0;
945: 
947:   /*
948:   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
949:   PetscSynchronizedFlush(A->comm);
950:   */
951:   /* diagonal part */
952:   (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);
953:   VecSet(a->slvec1b,zero);

955:   /* subdiagonal part */
956:   (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);

958:   /* copy x into the vec slvec0 */
959:   VecGetArray(a->slvec0,&from);
960:   VecGetArray(xx,&x);
961:   PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));
962:   VecRestoreArray(a->slvec0,&from);
963: 
964:   VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
965:   VecRestoreArray(xx,&x);
966:   VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
967: 
968:   /* supperdiagonal part */
969:   (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);
970: 
971:   return(0);
972: }

976: PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
977: {
978:   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;

982:   VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
983:   /* do diagonal part */
984:   (*a->A->ops->multadd)(a->A,xx,yy,zz);
985:   /* do supperdiagonal part */
986:   VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
987:   (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);

989:   /* do subdiagonal part */
990:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
991:   VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
992:   VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);

994:   return(0);
995: }

997: /*
998:   This only works correctly for square matrices where the subblock A->A is the 
999:    diagonal block
1000: */
1003: PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1004: {
1005:   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;

1009:   /* if (a->rmap.N != a->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1010:   MatGetDiagonal(a->A,v);
1011:   return(0);
1012: }

1016: PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1017: {
1018:   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;

1022:   MatScale(a->A,aa);
1023:   MatScale(a->B,aa);
1024:   return(0);
1025: }

1029: PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1030: {
1031:   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1032:   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1034:   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1035:   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1036:   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;

1039:   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1040:   mat->getrowactive = PETSC_TRUE;

1042:   if (!mat->rowvalues && (idx || v)) {
1043:     /*
1044:         allocate enough space to hold information from the longest row.
1045:     */
1046:     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1047:     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1048:     PetscInt     max = 1,mbs = mat->mbs,tmp;
1049:     for (i=0; i<mbs; i++) {
1050:       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1051:       if (max < tmp) { max = tmp; }
1052:     }
1053:     PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);
1054:     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1055:   }
1056: 
1057:   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1058:   lrow = row - brstart;  /* local row index */

1060:   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1061:   if (!v)   {pvA = 0; pvB = 0;}
1062:   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1063:   (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1064:   (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1065:   nztot = nzA + nzB;

1067:   cmap  = mat->garray;
1068:   if (v  || idx) {
1069:     if (nztot) {
1070:       /* Sort by increasing column numbers, assuming A and B already sorted */
1071:       PetscInt imark = -1;
1072:       if (v) {
1073:         *v = v_p = mat->rowvalues;
1074:         for (i=0; i<nzB; i++) {
1075:           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1076:           else break;
1077:         }
1078:         imark = i;
1079:         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1080:         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1081:       }
1082:       if (idx) {
1083:         *idx = idx_p = mat->rowindices;
1084:         if (imark > -1) {
1085:           for (i=0; i<imark; i++) {
1086:             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1087:           }
1088:         } else {
1089:           for (i=0; i<nzB; i++) {
1090:             if (cmap[cworkB[i]/bs] < cstart)
1091:               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1092:             else break;
1093:           }
1094:           imark = i;
1095:         }
1096:         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1097:         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1098:       }
1099:     } else {
1100:       if (idx) *idx = 0;
1101:       if (v)   *v   = 0;
1102:     }
1103:   }
1104:   *nz = nztot;
1105:   (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1106:   (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1107:   return(0);
1108: }

1112: PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1113: {
1114:   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;

1117:   if (!baij->getrowactive) {
1118:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1119:   }
1120:   baij->getrowactive = PETSC_FALSE;
1121:   return(0);
1122: }

1126: PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1127: {
1128:   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1129:   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;

1132:   aA->getrow_utriangular = PETSC_TRUE;
1133:   return(0);
1134: }
1137: PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1138: {
1139:   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1140:   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;

1143:   aA->getrow_utriangular = PETSC_FALSE;
1144:   return(0);
1145: }

1149: PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1150: {
1151:   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;

1155:   MatRealPart(a->A);
1156:   MatRealPart(a->B);
1157:   return(0);
1158: }

1162: PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1163: {
1164:   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;

1168:   MatImaginaryPart(a->A);
1169:   MatImaginaryPart(a->B);
1170:   return(0);
1171: }

1175: PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1176: {
1177:   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;

1181:   MatZeroEntries(l->A);
1182:   MatZeroEntries(l->B);
1183:   return(0);
1184: }

1188: PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1189: {
1190:   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1191:   Mat            A = a->A,B = a->B;
1193:   PetscReal      isend[5],irecv[5];

1196:   info->block_size     = (PetscReal)matin->rmap.bs;
1197:   MatGetInfo(A,MAT_LOCAL,info);
1198:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1199:   isend[3] = info->memory;  isend[4] = info->mallocs;
1200:   MatGetInfo(B,MAT_LOCAL,info);
1201:   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1202:   isend[3] += info->memory;  isend[4] += info->mallocs;
1203:   if (flag == MAT_LOCAL) {
1204:     info->nz_used      = isend[0];
1205:     info->nz_allocated = isend[1];
1206:     info->nz_unneeded  = isend[2];
1207:     info->memory       = isend[3];
1208:     info->mallocs      = isend[4];
1209:   } else if (flag == MAT_GLOBAL_MAX) {
1210:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
1211:     info->nz_used      = irecv[0];
1212:     info->nz_allocated = irecv[1];