Actual source code: mpirowbs.c

  1: #define PETSCMAT_DLL

 3:  #include src/mat/impls/rowbs/mpi/mpirowbs.h

  5: #define CHUNCKSIZE_LOCAL   10

  9: static PetscErrorCode MatFreeRowbs_Private(Mat A,int n,int *i,PetscScalar *v)
 10: {

 14:   if (v) {
 15: #if defined(PETSC_USE_LOG)
 16:     int len = -n*(sizeof(int)+sizeof(PetscScalar));
 17: #endif
 18:     PetscFree(v);
 19:     PetscLogObjectMemory(A,len);
 20:   }
 21:   return(0);
 22: }

 26: static PetscErrorCode MatMallocRowbs_Private(Mat A,int n,int **i,PetscScalar **v)
 27: {
 29:   int len;

 32:   if (!n) {
 33:     *i = 0; *v = 0;
 34:   } else {
 35:     len = n*(sizeof(int) + sizeof(PetscScalar));
 36:     PetscMalloc(len,v);
 37:     PetscLogObjectMemory(A,len);
 38:     *i = (int*)(*v + n);
 39:   }
 40:   return(0);
 41: }

 45: PetscErrorCode MatScale_MPIRowbs(Mat inA,PetscScalar alpha)
 46: {
 47:   Mat_MPIRowbs   *a = (Mat_MPIRowbs*)inA->data;
 48:   BSspmat        *A = a->A;
 49:   BSsprow        *vs;
 50:   PetscScalar    *ap;
 51:   int            i,m = inA->rmap.n,nrow,j;

 55:   for (i=0; i<m; i++) {
 56:     vs   = A->rows[i];
 57:     nrow = vs->length;
 58:     ap   = vs->nz;
 59:     for (j=0; j<nrow; j++) {
 60:       ap[j] *= alpha;
 61:     }
 62:   }
 63:   PetscLogFlops(a->nz);
 64:   return(0);
 65: }

 67: /* ----------------------------------------------------------------- */
 70: static PetscErrorCode MatCreateMPIRowbs_local(Mat A,int nz,const int nnz[])
 71: {
 72:   Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)A->data;
 74:   int   i,len,m = A->rmap.n,*tnnz;
 75:   BSspmat      *bsmat;
 76:   BSsprow      *vs;

 79:   PetscMalloc((m+1)*sizeof(int),&tnnz);
 80:   if (!nnz) {
 81:     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
 82:     if (nz <= 0)             nz = 1;
 83:     for (i=0; i<m; i++) tnnz[i] = nz;
 84:     nz      = nz*m;
 85:   } else {
 86:     nz = 0;
 87:     for (i=0; i<m; i++) {
 88:       if (nnz[i] <= 0) tnnz[i] = 1;
 89:       else             tnnz[i] = nnz[i];
 90:       nz += tnnz[i];
 91:     }
 92:   }

 94:   /* Allocate BlockSolve matrix context */
 95:   PetscNew(BSspmat,&bsif->A);
 96:   bsmat = bsif->A;
 97:   BSset_mat_icc_storage(bsmat,PETSC_FALSE);
 98:   BSset_mat_symmetric(bsmat,PETSC_FALSE);
 99:   len                    = m*(sizeof(BSsprow*)+ sizeof(BSsprow)) + 1;
100:   PetscMalloc(len,&bsmat->rows);
101:   bsmat->num_rows        = m;
102:   bsmat->global_num_rows = A->rmap.N;
103:   bsmat->map             = bsif->bsmap;
104:   vs                     = (BSsprow*)(bsmat->rows + m);
105:   for (i=0; i<m; i++) {
106:     bsmat->rows[i]  = vs;
107:     bsif->imax[i]   = tnnz[i];
108:     vs->diag_ind    = -1;
109:     MatMallocRowbs_Private(A,tnnz[i],&(vs->col),&(vs->nz));
110:     /* put zero on diagonal */
111:     /*vs->length            = 1;
112:     vs->col[0]      = i + bsif->rstart;
113:     vs->nz[0]       = 0.0;*/
114:     vs->length = 0;
115:     vs++;
116:   }
117:   PetscLogObjectMemory(A,sizeof(BSspmat) + len);
118:   bsif->nz               = 0;
119:   bsif->maxnz            = nz;
120:   bsif->sorted           = 0;
121:   bsif->roworiented      = PETSC_TRUE;
122:   bsif->nonew            = 0;
123:   bsif->bs_color_single  = 0;

125:   PetscFree(tnnz);
126:   return(0);
127: }

131: static PetscErrorCode MatSetValues_MPIRowbs_local(Mat AA,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
132: {
133:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
134:   BSspmat      *A = mat->A;
135:   BSsprow      *vs;
137:   int          *rp,k,a,b,t,ii,row,nrow,i,col,l,rmax;
138:   int          *imax = mat->imax,nonew = mat->nonew,sorted = mat->sorted;
139:   PetscScalar  *ap,value;

142:   for (k=0; k<m; k++) { /* loop over added rows */
143:     row = im[k];
144:     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
145:     if (row >= AA->rmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,AA->rmap.n-1);
146:     vs   = A->rows[row];
147:     ap   = vs->nz; rp = vs->col;
148:     rmax = imax[row]; nrow = vs->length;
149:     a    = 0;
150:     for (l=0; l<n; l++) { /* loop over added columns */
151:       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative col: %d",in[l]);
152:       if (in[l] >= AA->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],AA->cmap.N-1);
153:       col = in[l]; value = *v++;
154:       if (!sorted) a = 0; b = nrow;
155:       while (b-a > 5) {
156:         t = (b+a)/2;
157:         if (rp[t] > col) b = t;
158:         else             a = t;
159:       }
160:       for (i=a; i<b; i++) {
161:         if (rp[i] > col) break;
162:         if (rp[i] == col) {
163:           if (addv == ADD_VALUES) ap[i] += value;
164:           else                    ap[i] = value;
165:           goto noinsert;
166:         }
167:       }
168:       if (nonew) goto noinsert;
169:       if (nrow >= rmax) {
170:         /* there is no extra room in row, therefore enlarge */
171:         int    *itemp,*iout,*iin = vs->col;
172:         PetscScalar *vout,*vin = vs->nz,*vtemp;

174:         /* malloc new storage space */
175:         imax[row] += CHUNCKSIZE_LOCAL;
176:         MatMallocRowbs_Private(AA,imax[row],&itemp,&vtemp);
177:         vout = vtemp; iout = itemp;
178:         for (ii=0; ii<i; ii++) {
179:           vout[ii] = vin[ii];
180:           iout[ii] = iin[ii];
181:         }
182:         vout[i] = value;
183:         iout[i] = col;
184:         for (ii=i+1; ii<=nrow; ii++) {
185:           vout[ii] = vin[ii-1];
186:           iout[ii] = iin[ii-1];
187:         }
188:         /* free old row storage */
189:         if (rmax > 0) {
190:           MatFreeRowbs_Private(AA,rmax,vs->col,vs->nz);
191:         }
192:         vs->col           =  iout; vs->nz = vout;
193:         rmax              =  imax[row];
194:         mat->maxnz        += CHUNCKSIZE_LOCAL;
195:         mat->reallocs++;
196:       } else {
197:         /* shift higher columns over to make room for newie */
198:         for (ii=nrow-1; ii>=i; ii--) {
199:           rp[ii+1] = rp[ii];
200:           ap[ii+1] = ap[ii];
201:         }
202:         rp[i] = col;
203:         ap[i] = value;
204:       }
205:       nrow++;
206:       mat->nz++;
207:       AA->same_nonzero = PETSC_FALSE;
208:       noinsert:;
209:       a = i + 1;
210:     }
211:     vs->length = nrow;
212:   }
213:   return(0);
214: }


219: static PetscErrorCode MatAssemblyBegin_MPIRowbs_local(Mat A,MatAssemblyType mode)
220: {
222:   return(0);
223: }

227: static PetscErrorCode MatAssemblyEnd_MPIRowbs_local(Mat AA,MatAssemblyType mode)
228: {
229:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)AA->data;
230:   BSspmat      *A = a->A;
231:   BSsprow      *vs;
232:   int          i,j,rstart = AA->rmap.rstart;

235:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

237:   /* Mark location of diagonal */
238:   for (i=0; i<AA->rmap.n; i++) {
239:     vs = A->rows[i];
240:     for (j=0; j<vs->length; j++) {
241:       if (vs->col[j] == i + rstart) {
242:         vs->diag_ind = j;
243:         break;
244:       }
245:     }
246:     if (vs->diag_ind == -1) {
247:       SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"no diagonal entry");
248:     }
249:   }
250:   return(0);
251: }

255: static PetscErrorCode MatZeroRows_MPIRowbs_local(Mat A,PetscInt N,const PetscInt rz[],PetscScalar diag)
256: {
257:   Mat_MPIRowbs   *a = (Mat_MPIRowbs*)A->data;
258:   BSspmat        *l = a->A;
260:   int            i,m = A->rmap.n - 1,col,base=A->rmap.rstart;

263:   if (a->keepzeroedrows) {
264:     for (i=0; i<N; i++) {
265:       if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
266:       PetscMemzero(l->rows[rz[i]]->nz,l->rows[rz[i]]->length*sizeof(PetscScalar));
267:       if (diag != 0.0) {
268:         col=rz[i]+base;
269:         MatSetValues_MPIRowbs_local(A,1,&rz[i],1,&col,&diag,INSERT_VALUES);
270:       }
271:     }
272:   } else {
273:     if (diag != 0.0) {
274:       for (i=0; i<N; i++) {
275:         if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
276:         if (l->rows[rz[i]]->length > 0) { /* in case row was completely empty */
277:           l->rows[rz[i]]->length = 1;
278:           l->rows[rz[i]]->nz[0]  = diag;
279:           l->rows[rz[i]]->col[0] = A->rmap.rstart + rz[i];
280:         } else {
281:           col=rz[i]+base;
282:           MatSetValues_MPIRowbs_local(A,1,&rz[i],1,&col,&diag,INSERT_VALUES);
283:         }
284:       }
285:     } else {
286:       for (i=0; i<N; i++) {
287:         if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
288:         l->rows[rz[i]]->length = 0;
289:       }
290:     }
291:     A->same_nonzero = PETSC_FALSE;
292:   }
293:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
294:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
295:   return(0);
296: }

300: static PetscErrorCode MatNorm_MPIRowbs_local(Mat A,NormType type,PetscReal *norm)
301: {
302:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
303:   BSsprow      *vs,**rs;
304:   PetscScalar  *xv;
305:   PetscReal    sum = 0.0;
307:   int          *xi,nz,i,j;

310:   rs = mat->A->rows;
311:   if (type == NORM_FROBENIUS) {
312:     for (i=0; i<A->rmap.n; i++) {
313:       vs = *rs++;
314:       nz = vs->length;
315:       xv = vs->nz;
316:       while (nz--) {
317: #if defined(PETSC_USE_COMPLEX)
318:         sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
319: #else
320:         sum += (*xv)*(*xv); xv++;
321: #endif
322:       }
323:     }
324:     *norm = sqrt(sum);
325:   } else if (type == NORM_1) { /* max column norm */
326:     PetscReal *tmp;
327:     PetscMalloc(A->cmap.n*sizeof(PetscReal),&tmp);
328:     PetscMemzero(tmp,A->cmap.n*sizeof(PetscReal));
329:     *norm = 0.0;
330:     for (i=0; i<A->rmap.n; i++) {
331:       vs = *rs++;
332:       nz = vs->length;
333:       xi = vs->col;
334:       xv = vs->nz;
335:       while (nz--) {
336:         tmp[*xi] += PetscAbsScalar(*xv);
337:         xi++; xv++;
338:       }
339:     }
340:     for (j=0; j<A->rmap.n; j++) {
341:       if (tmp[j] > *norm) *norm = tmp[j];
342:     }
343:     PetscFree(tmp);
344:   } else if (type == NORM_INFINITY) { /* max row norm */
345:     *norm = 0.0;
346:     for (i=0; i<A->rmap.n; i++) {
347:       vs = *rs++;
348:       nz = vs->length;
349:       xv = vs->nz;
350:       sum = 0.0;
351:       while (nz--) {
352:         sum += PetscAbsScalar(*xv); xv++;
353:       }
354:       if (sum > *norm) *norm = sum;
355:     }
356:   } else {
357:     SETERRQ(PETSC_ERR_SUP,"No support for the two norm");
358:   }
359:   return(0);
360: }

362: /* ----------------------------------------------------------------- */

366: PetscErrorCode MatSetValues_MPIRowbs(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode av)
367: {
368:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
370:   int   i,j,row,col,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
371:   PetscTruth   roworiented = a->roworiented;

374:   /* Note:  There's no need to "unscale" the matrix, since scaling is
375:      confined to a->pA, and we're working with a->A here */
376:   for (i=0; i<m; i++) {
377:     if (im[i] < 0) continue;
378:     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->rmap.N-1);
379:     if (im[i] >= rstart && im[i] < rend) {
380:       row = im[i] - rstart;
381:       for (j=0; j<n; j++) {
382:         if (in[j] < 0) continue;
383:         if (in[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->cmap.N-1);
384:         if (in[j] >= 0 && in[j] < mat->cmap.N){
385:           col = in[j];
386:           if (roworiented) {
387:             MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i*n+j,av);
388:           } else {
389:             MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i+j*m,av);
390:           }
391:         } else {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid column");}
392:       }
393:     } else {
394:       if (!a->donotstash) {
395:         if (roworiented) {
396:           MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
397:         } else {
398:           MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
399:         }
400:       }
401:     }
402:   }
403:   return(0);
404: }

408: PetscErrorCode MatAssemblyBegin_MPIRowbs(Mat mat,MatAssemblyType mode)
409: {
410:   MPI_Comm      comm = mat->comm;
412:   int         nstash,reallocs;
413:   InsertMode    addv;

416:   /* Note:  There's no need to "unscale" the matrix, since scaling is
417:             confined to a->pA, and we're working with a->A here */

419:   /* make sure all processors are either in INSERTMODE or ADDMODE */
420:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
421:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
422:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some procs inserted; others added");
423:   }
424:   mat->insertmode = addv; /* in case this processor had no cache */

426:   MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);
427:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
428:   PetscInfo2(0,"Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
429:   return(0);
430: }

434: static PetscErrorCode MatView_MPIRowbs_ASCII(Mat mat,PetscViewer viewer)
435: {
436:   Mat_MPIRowbs      *a = (Mat_MPIRowbs*)mat->data;
438:   int               i,j;
439:   PetscTruth        iascii;
440:   BSspmat           *A = a->A;
441:   BSsprow           **rs = A->rows;
442:   PetscViewerFormat format;

445:   PetscViewerGetFormat(viewer,&format);
446:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);

448:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
449:     int ind_l,ind_g,clq_l,clq_g,color;
450:     ind_l = BSlocal_num_inodes(a->pA);CHKERRBS(0);
451:     ind_g = BSglobal_num_inodes(a->pA);CHKERRBS(0);
452:     clq_l = BSlocal_num_cliques(a->pA);CHKERRBS(0);
453:     clq_g = BSglobal_num_cliques(a->pA);CHKERRBS(0);
454:     color = BSnum_colors(a->pA);CHKERRBS(0);
455:     PetscViewerASCIIPrintf(viewer,"  %d global inode(s), %d global clique(s), %d color(s)\n",ind_g,clq_g,color);
456:     PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d local inode(s), %d local clique(s)\n",a->rank,ind_l,clq_l);
457:   } else  if (format == PETSC_VIEWER_ASCII_COMMON) {
458:     for (i=0; i<A->num_rows; i++) {
459:       PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+mat->rmap.rstart);
460:       for (j=0; j<rs[i]->length; j++) {
461:         if (rs[i]->nz[j]) {PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);}
462:       }
463:       PetscViewerASCIISynchronizedPrintf(viewer,"\n");
464:     }
465:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
466:     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
467:   } else {
468:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
469:     for (i=0; i<A->num_rows; i++) {
470:       PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+mat->rmap.rstart);
471:       for (j=0; j<rs[i]->length; j++) {
472:         PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);
473:       }
474:       PetscViewerASCIISynchronizedPrintf(viewer,"\n");
475:     }
476:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
477:   }
478:   PetscViewerFlush(viewer);
479:   return(0);
480: }

484: static PetscErrorCode MatView_MPIRowbs_Binary(Mat mat,PetscViewer viewer)
485: {
486:   Mat_MPIRowbs   *a = (Mat_MPIRowbs*)mat->data;
488:   PetscMPIInt    rank,size;
489:   PetscInt       i,M,m,*sbuff,*rowlengths;
490:   PetscInt       *recvcts,*recvdisp,fd,*cols,maxnz,nz,j;
491:   BSspmat        *A = a->A;
492:   BSsprow        **rs = A->rows;
493:   MPI_Comm       comm = mat->comm;
494:   MPI_Status     status;
495:   PetscScalar    *vals;
496:   MatInfo        info;

499:   MPI_Comm_size(comm,&size);
500:   MPI_Comm_rank(comm,&rank);

502:   M = mat->rmap.N; m = mat->rmap.n;
503:   /* First gather together on the first processor the lengths of 
504:      each row, and write them out to the file */
505:   PetscMalloc(m*sizeof(int),&sbuff);
506:   for (i=0; i<A->num_rows; i++) {
507:     sbuff[i] = rs[i]->length;
508:   }
509:   MatGetInfo(mat,MAT_GLOBAL_SUM,&info);
510:   if (!rank) {
511:     PetscViewerBinaryGetDescriptor(viewer,&fd);
512:     PetscMalloc((4+M)*sizeof(int),&rowlengths);
513:     PetscMalloc(size*sizeof(int),&recvcts);
514:     recvdisp = mat->rmap.range;
515:     for (i=0; i<size; i++) {
516:       recvcts[i] = recvdisp[i+1] - recvdisp[i];
517:     }
518:     /* first four elements of rowlength are the header */
519:     rowlengths[0] = mat->cookie;
520:     rowlengths[1] = mat->rmap.N;
521:     rowlengths[2] = mat->cmap.N;
522:     rowlengths[3] = (int)info.nz_used;
523:     MPI_Gatherv(sbuff,m,MPI_INT,rowlengths+4,recvcts,recvdisp,MPI_INT,0,comm);
524:     PetscFree(sbuff);
525:     PetscBinaryWrite(fd,rowlengths,4+M,PETSC_INT,PETSC_FALSE);
526:     /* count the number of nonzeros on each processor */
527:     PetscMemzero(recvcts,size*sizeof(int));
528:     for (i=0; i<size; i++) {
529:       for (j=recvdisp[i]; j<recvdisp[i+1]; j++) {
530:         recvcts[i] += rowlengths[j+3];
531:       }
532:     }
533:     /* allocate buffer long enough to hold largest one */
534:     maxnz = 0;
535:     for (i=0; i<size; i++) {
536:       maxnz = PetscMax(maxnz,recvcts[i]);
537:     }
538:     PetscFree(rowlengths);
539:     PetscFree(recvcts);
540:     PetscMalloc(maxnz*sizeof(int),&cols);

542:     /* binary store column indices for 0th processor */
543:     nz = 0;
544:     for (i=0; i<A->num_rows; i++) {
545:       for (j=0; j<rs[i]->length; j++) {
546:         cols[nz++] = rs[i]->col[j];
547:       }
548:     }
549:     PetscBinaryWrite(fd,cols,nz,PETSC_INT,PETSC_FALSE);

551:     /* receive and store column indices for all other processors */
552:     for (i=1; i<size; i++) {
553:       /* should tell processor that I am now ready and to begin the send */
554:       MPI_Recv(cols,maxnz,MPI_INT,i,mat->tag,comm,&status);
555:       MPI_Get_count(&status,MPI_INT,&nz);
556:       PetscBinaryWrite(fd,cols,nz,PETSC_INT,PETSC_FALSE);
557:     }
558:     PetscFree(cols);
559:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

561:     /* binary store values for 0th processor */
562:     nz = 0;
563:     for (i=0; i<A->num_rows; i++) {
564:       for (j=0; j<rs[i]->length; j++) {
565:         vals[nz++] = rs[i]->nz[j];
566:       }
567:     }
568:     PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,PETSC_FALSE);

570:     /* receive and store nonzeros for all other processors */
571:     for (i=1; i<size; i++) {
572:       /* should tell processor that I am now ready and to begin the send */
573:       MPI_Recv(vals,maxnz,MPIU_SCALAR,i,mat->tag,comm,&status);
574:       MPI_Get_count(&status,MPIU_SCALAR,&nz);
575:       PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,PETSC_FALSE);
576:     }
577:     PetscFree(vals);
578:   } else {
579:     MPI_Gatherv(sbuff,m,MPI_INT,0,0,0,MPI_INT,0,comm);
580:     PetscFree(sbuff);

582:     /* count local nonzeros */
583:     nz = 0;
584:     for (i=0; i<A->num_rows; i++) {
585:       for (j=0; j<rs[i]->length; j++) {
586:         nz++;
587:       }
588:     }
589:     /* copy into buffer column indices */
590:     PetscMalloc(nz*sizeof(int),&cols);
591:     nz = 0;
592:     for (i=0; i<A->num_rows; i++) {
593:       for (j=0; j<rs[i]->length; j++) {
594:         cols[nz++] = rs[i]->col[j];
595:       }
596:     }
597:     /* send */  /* should wait until processor zero tells me to go */
598:     MPI_Send(cols,nz,MPI_INT,0,mat->tag,comm);
599:     PetscFree(cols);

601:     /* copy into buffer column values */
602:     PetscMalloc(nz*sizeof(PetscScalar),&vals);
603:     nz   = 0;
604:     for (i=0; i<A->num_rows; i++) {
605:       for (j=0; j<rs[i]->length; j++) {
606:         vals[nz++] = rs[i]->nz[j];
607:       }
608:     }
609:     /* send */  /* should wait until processor zero tells me to go */
610:     MPI_Send(vals,nz,MPIU_SCALAR,0,mat->tag,comm);
611:     PetscFree(vals);
612:   }

614:   return(0);
615: }

619: PetscErrorCode MatView_MPIRowbs(Mat mat,PetscViewer viewer)
620: {
621:   Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
623:   PetscTruth   iascii,isbinary;

626:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
627:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
628:   if (!bsif->blocksolveassembly) {
629:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
630:   }
631:   if (iascii) {
632:     MatView_MPIRowbs_ASCII(mat,viewer);
633:   } else if (isbinary) {
634:     MatView_MPIRowbs_Binary(mat,viewer);
635:   } else {
636:     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIRowbs matrices",((PetscObject)viewer)->type_name);
637:   }
638:   return(0);
639: }
640: 
643: static PetscErrorCode MatAssemblyEnd_MPIRowbs_MakeSymmetric(Mat mat)
644: {
645:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
646:   BSspmat      *A = a->A;
647:   BSsprow      *vs;
648:   int          size,rank,M,rstart,tag,i,j,*rtable,*w1,*w3,*w4,len,proc,nrqs;
649:   int          msz,*pa,bsz,nrqr,**rbuf1,**sbuf1,**ptr,*tmp,*ctr,col,idx,row;
651:   int          ctr_j,*sbuf1_j,k;
652:   PetscScalar  val=0.0;
653:   MPI_Comm     comm;
654:   MPI_Request  *s_waits1,*r_waits1;
655:   MPI_Status   *s_status,*r_status;

658:   comm   = mat->comm;
659:   tag    = mat->tag;
660:   size   = a->size;
661:   rank   = a->rank;
662:   M      = mat->rmap.N;
663:   rstart = mat->rmap.rstart;

665:   PetscMalloc(M*sizeof(int),&rtable);
666:   /* Create hash table for the mapping :row -> proc */
667:   for (i=0,j=0; i<size; i++) {
668:     len = mat->rmap.range[i+1];
669:     for (; j<len; j++) {
670:       rtable[j] = i;
671:     }
672:   }

674:   /* Evaluate communication - mesg to whom, length of mesg, and buffer space
675:      required. Based on this, buffers are allocated, and data copied into them. */
676:   PetscMalloc(size*4*sizeof(int),&w1);/*  mesg size */
677:   w3   = w1 + 2*size;       /* no of IS that needs to be sent to proc i */
678:   w4   = w3 + size;       /* temp work space used in determining w1,  w3 */
679:   PetscMemzero(w1,size*3*sizeof(int)); /* initialize work vector */

681:   for (i=0;  i<mat->rmap.n; i++) {
682:     PetscMemzero(w4,size*sizeof(int)); /* initialize work vector */
683:     vs = A->rows[i];
684:     for (j=0; j<vs->length; j++) {
685:       proc = rtable[vs->col[j]];
686:       w4[proc]++;
687:     }
688:     for (j=0; j<size; j++) {
689:       if (w4[j]) { w1[2*j] += w4[j]; w3[j]++;}
690:     }
691:   }
692: 
693:   nrqs       = 0;              /* number of outgoing messages */
694:   msz        = 0;              /* total mesg length (for all proc */
695:   w1[2*rank] = 0;              /* no mesg sent to itself */
696:   w3[rank]   = 0;
697:   for (i=0; i<size; i++) {
698:     if (w1[2*i])  {w1[2*i+1] = 1; nrqs++;} /* there exists a message to proc i */
699:   }
700:   /* pa - is list of processors to communicate with */
701:   PetscMalloc((nrqs+1)*sizeof(int),&pa);
702:   for (i=0,j=0; i<size; i++) {
703:     if (w1[2*i]) {pa[j] = i; j++;}
704:   }

706:   /* Each message would have a header = 1 + 2*(no of ROWS) + data */
707:   for (i=0; i<nrqs; i++) {
708:     j       = pa[i];
709:     w1[2*j] += w1[2*j+1] + 2*w3[j];
710:     msz     += w1[2*j];
711:   }
712: 
713:   /* Do a global reduction to determine how many messages to expect */
714:   PetscMaxSum(comm,w1,&bsz,&nrqr);

716:   /* Allocate memory for recv buffers . Prob none if nrqr = 0 ???? */
717:   len      = (nrqr+1)*sizeof(int*) + nrqr*bsz*sizeof(int);
718:   PetscMalloc(len,&rbuf1);
719:   rbuf1[0] = (int*)(rbuf1 + nrqr);
720:   for (i=1; i<nrqr; ++i) rbuf1[i] = rbuf1[i-1] + bsz;

722:   /* Post the receives */
723:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&r_waits1);
724:   for (i=0; i<nrqr; ++i){
725:     MPI_Irecv(rbuf1[i],bsz,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits1+i);
726:   }
727: 
728:   /* Allocate Memory for outgoing messages */
729:   len   = 2*size*sizeof(int*) + (size+msz)*sizeof(int);
730:   PetscMalloc(len,&sbuf1);
731:   ptr   = sbuf1 + size;     /* Pointers to the data in outgoing buffers */
732:   PetscMemzero(sbuf1,2*size*sizeof(int*));
733:   tmp   = (int*)(sbuf1 + 2*size);
734:   ctr   = tmp + msz;

736:   {
737:     int *iptr = tmp,ict  = 0;
738:     for (i=0; i<nrqs; i++) {
739:       j        = pa[i];
740:       iptr    += ict;
741:       sbuf1[j] = iptr;
742:       ict      = w1[2*j];
743:     }
744:   }

746:   /* Form the outgoing messages */
747:   /* Clean up the header space */
748:   for (i=0; i<nrqs; i++) {
749:     j           = pa[i];
750:     sbuf1[j][0] = 0;
751:     PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(int));
752:     ptr[j]      = sbuf1[j] + 2*w3[j] + 1;
753:   }

755:   /* Parse the matrix and copy the data into sbuf1 */
756:   for (i=0; i<mat->rmap.n; i++) {
757:     PetscMemzero(ctr,size*sizeof(int));
758:     vs = A->rows[i];
759:     for (j=0; j<vs->length; j++) {
760:       col  = vs->col[j];
761:       proc = rtable[col];
762:       if (proc != rank) { /* copy to the outgoing buffer */
763:         ctr[proc]++;
764:           *ptr[proc] = col;
765:           ptr[proc]++;
766:       } else {
767:         row = col - rstart;
768:         col = i + rstart;
769:         MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
770:       }
771:     }
772:     /* Update the headers for the current row */
773:     for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */
774:       if ((ctr_j = ctr[j])) {
775:         sbuf1_j        = sbuf1[j];
776:         k               = ++sbuf1_j[0];
777:         sbuf1_j[2*k]   = ctr_j;
778:         sbuf1_j[2*k-1] = i + rstart;
779:       }
780:     }
781:   }
782:    /* Check Validity of the outgoing messages */
783:   {
784:     int sum;
785:     for (i=0 ; i<nrqs ; i++) {
786:       j = pa[i];
787:       if (w3[j] != sbuf1[j][0]) {SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[1] mismatch!\n"); }
788:     }

790:     for (i=0 ; i<nrqs ; i++) {
791:       j = pa[i];
792:       sum = 1;
793:       for (k = 1; k <= w3[j]; k++) sum += sbuf1[j][2*k]+2;
794:       if (sum != w1[2*j]) { SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[2-n] mismatch!\n"); }
795:     }
796:   }
797: 
798:   /* Now post the sends */
799:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
800:   for (i=0; i<nrqs; ++i) {
801:     j    = pa[i];
802:     MPI_Isend(sbuf1[j],w1[2*j],MPI_INT,j,tag,comm,s_waits1+i);
803:   }
804: 
805:   /* Receive messages*/
806:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status);
807:   for (i=0; i<nrqr; ++i) {
808:     MPI_Waitany(nrqr,r_waits1,&idx,r_status+i);
809:     /* Process the Message */
810:     {
811:       int    *rbuf1_i,n_row,ct1;

813:       rbuf1_i = rbuf1[idx];
814:       n_row   = rbuf1_i[0];
815:       ct1     = 2*n_row+1;
816:       val     = 0.0;
817:       /* Optimise this later */
818:       for (j=1; j<=n_row; j++) {
819:         col = rbuf1_i[2*j-1];
820:         for (k=0; k<rbuf1_i[2*j]; k++,ct1++) {
821:           row = rbuf1_i[ct1] - rstart;
822:           MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
823:         }
824:       }
825:     }
826:   }

828:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status);
829:   if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status);}

831:   PetscFree(rtable);
832:   PetscFree(w1);
833:   PetscFree(pa);
834:   PetscFree(rbuf1);
835:   PetscFree(sbuf1);
836:   PetscFree(r_waits1);
837:   PetscFree(s_waits1);
838:   PetscFree(r_status);
839:   PetscFree(s_status);
840:   return(0);
841: }

843: /*
844:      This does the BlockSolve portion of the matrix assembly.
845:    It is provided in a separate routine so that users can
846:    operate on the matrix (using MatScale(), MatShift() etc.) after 
847:    the matrix has been assembled but before BlockSolve has sucked it
848:    in and devoured it.
849: */
852: PetscErrorCode MatAssemblyEnd_MPIRowbs_ForBlockSolve(Mat mat)
853: {
854:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
856:   int          ldim,low,high,i;
857:   PetscScalar  *diag;

860:   if ((mat->was_assembled) && (!mat->same_nonzero)) {  /* Free the old info */
861:     if (a->pA)       {BSfree_par_mat(a->pA);CHKERRBS(0);}
862:     if (a->comm_pA)  {BSfree_comm(a->comm_pA);CHKERRBS(0);}
863:   }

865:   if ((!mat->same_nonzero) || (!mat->was_assembled)) {
866:     /* Indicates bypassing cliques in coloring */
867:     if (a->bs_color_single) {
868:       BSctx_set_si(a->procinfo,100);
869:     }
870:     /* Form permuted matrix for efficient parallel execution */
871:     a->pA = BSmain_perm(a->procinfo,a->A);CHKERRBS(0);
872:     /* Set up the communication */
873:     a->comm_pA = BSsetup_forward(a->pA,a->procinfo);CHKERRBS(0);
874:   } else {
875:     /* Repermute the matrix */
876:     BSmain_reperm(a->procinfo,a->A,a->pA);CHKERRBS(0);
877:   }

879:   /* Symmetrically scale the matrix by the diagonal */
880:   BSscale_diag(a->pA,a->pA->diag,a->procinfo);CHKERRBS(0);

882:   /* Store inverse of square root of permuted diagonal scaling matrix */
883:   VecGetLocalSize(a->diag,&ldim);
884:   VecGetOwnershipRange(a->diag,&low,&high);
885:   VecGetArray(a->diag,&diag);
886:   for (i=0; i<ldim; i++) {
887:     if (a->pA->scale_diag[i] != 0.0) {
888:       diag[i] = 1.0/sqrt(PetscAbsScalar(a->pA->scale_diag[i]));
889:     } else {
890:       diag[i] = 1.0;
891:     }
892:   }
893:   VecRestoreArray(a->diag,&diag);
894:   a->assembled_icc_storage = a->A->icc_storage;
895:   a->blocksolveassembly = 1;
896:   mat->was_assembled    = PETSC_TRUE;
897:   mat->same_nonzero     = PETSC_TRUE;
898:   PetscInfo(mat,"Completed BlockSolve95 matrix assembly\n");
899:   return(0);
900: }

904: PetscErrorCode MatAssemblyEnd_MPIRowbs(Mat mat,MatAssemblyType mode)
905: {
906:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
908:   int          i,n,row,col,*rows,*cols,rstart,nzcount,flg,j,ncols;
909:   PetscScalar  *vals,val;
910:   InsertMode   addv = mat->insertmode;

913:   while (1) {
914:     MatStashScatterGetMesg_Private(&mat->stash,&n,&rows,&cols,&vals,&flg);
915:     if (!flg) break;
916: 
917:     for (i=0; i<n;) {
918:       /* Now identify the consecutive vals belonging to the same row */
919:       for (j=i,rstart=rows[j]; j<n; j++) { if (rows[j] != rstart) break; }
920:       if (j < n) ncols = j-i;
921:       else       ncols = n-i;
922:       /* Now assemble all these values with a single function call */
923:       MatSetValues_MPIRowbs(mat,1,rows+i,ncols,cols+i,vals+i,addv);
924:       i = j;
925:     }
926:   }
927:   MatStashScatterEnd_Private(&mat->stash);

929:   rstart = mat->rmap.rstart;
930:   nzcount = a->nz; /* This is the number of nonzeros entered by the user */
931:   /* BlockSolve requires that the matrix is structurally symmetric */
932:   if (mode == MAT_FINAL_ASSEMBLY && !mat->structurally_symmetric) {
933:     MatAssemblyEnd_MPIRowbs_MakeSymmetric(mat);
934:   }
935: 
936:   /* BlockSolve requires that all the diagonal elements are set */
937:   val  = 0.0;
938:   for (i=0; i<mat->rmap.n; i++) {
939:     row = i; col = i + rstart;
940:     MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
941:   }
942: 
943:   MatAssemblyBegin_MPIRowbs_local(mat,mode);
944:   MatAssemblyEnd_MPIRowbs_local(mat,mode);
945: 
946:   a->blocksolveassembly = 0;
947:   PetscInfo4(mat,"Matrix size: %d X %d; storage space: %d unneeded,%d used\n",mat->rmap.n,mat->cmap.n,a->maxnz-a->nz,a->nz);
948:   PetscInfo2(mat,"User entered %d nonzeros, PETSc added %d\n",nzcount,a->nz-nzcount);
949:   PetscInfo1(mat,"Number of mallocs during MatSetValues is %d\n",a->reallocs);
950:   return(0);
951: }

955: PetscErrorCode MatZeroEntries_MPIRowbs(Mat mat)
956: {
957:   Mat_MPIRowbs *l = (Mat_MPIRowbs*)mat->data;
958:   BSspmat      *A = l->A;
959:   BSsprow      *vs;
960:   int          i,j;

963:   for (i=0; i <mat->rmap.n; i++) {
964:     vs = A->rows[i];
965:     for (j=0; j< vs->length; j++) vs->nz[j] = 0.0;
966:   }
967:   return(0);
968: }

970: /* the code does not do the diagonal entries correctly unless the 
971:    matrix is square and the column and row owerships are identical.
972:    This is a BUG.
973: */

977: PetscErrorCode MatZeroRows_MPIRowbs(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
978: {
979:   Mat_MPIRowbs   *l = (Mat_MPIRowbs*)A->data;
981:   int            i,*owners = A->rmap.range,size = l->size;
982:   int            *nprocs,j,idx,nsends;
983:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
984:   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
985:   int            *lens,imdex,*lrows,*values;
986:   MPI_Comm       comm = A->comm;
987:   MPI_Request    *send_waits,*recv_waits;
988:   MPI_Status     recv_status,*send_status;
989:   PetscTruth     found;

992:   /*  first count number of contributors to each processor */
993:   PetscMalloc(2*size*sizeof(int),&nprocs);
994:   PetscMemzero(nprocs,2*size*sizeof(int));
995:   PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
996:   for (i=0; i<N; i++) {
997:     idx   = rows[i];
998:     found = PETSC_FALSE;
999:     for (j=0; j<size; j++) {
1000:       if (idx >= owners[j] && idx < owners[j+1]) {
1001:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
1002:       }
1003:     }
1004:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
1005:   }
1006:   nsends = 0;  for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}

1008:   /* inform other processors of number of messages and max length*/
1009:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

1011:   /* post receives:   */
1012:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
1013:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1014:   for (i=0; i<nrecvs; i++) {
1015:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1016:   }

1018:   /* do sends:
1019:       1) starts[i] gives the starting index in svalues for stuff going to 
1020:          the ith processor
1021:   */
1022:   PetscMalloc((N+1)*sizeof(int),&svalues);
1023:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1024:   PetscMalloc((size+1)*sizeof(int),&starts);
1025:   starts[0] = 0;
1026:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1027:   for (i=0; i<N; i++) {
1028:     svalues[starts[owner[i]]++] = rows[i];
1029:   }

1031:   starts[0] = 0;
1032:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1033:   count = 0;
1034:   for (i=0; i<size; i++) {
1035:     if (nprocs[2*i+1]) {
1036:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
1037:     }
1038:   }
1039:   PetscFree(starts);

1041:   base = owners[rank];

1043:   /*  wait on receives */
1044:   PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
1045:   source = lens + nrecvs;
1046:   count = nrecvs; slen = 0;
1047:   while (count) {
1048:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1049:     /* unpack receives into our local space */
1050:     MPI_Get_count(&recv_status,MPI_INT,&n);
1051:     source[imdex]  = recv_status.MPI_SOURCE;
1052:     lens[imdex]    = n;
1053:     slen           += n;
1054:     count--;
1055:   }
1056:   PetscFree(recv_waits);
1057: 
1058:   /* move the data into the send scatter */
1059:   PetscMalloc((slen+1)*sizeof(int),&lrows);
1060:   count = 0;
1061:   for (i=0; i<nrecvs; i++) {
1062:     values = rvalues + i*nmax;
1063:     for (j=0; j<lens[i]; j++) {
1064:       lrows[count++] = values[j] - base;
1065:     }
1066:   }
1067:   PetscFree(rvalues);
1068:   PetscFree(lens);
1069:   PetscFree(owner);
1070:   PetscFree(nprocs);
1071: 
1072:   /* actually zap the local rows */
1073:   MatZeroRows_MPIRowbs_local(A,slen,lrows,diag);
1074:   PetscFree(lrows);

1076:   /* wait on sends */
1077:   if (nsends) {
1078:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1079:     MPI_Waitall(nsends,send_waits,send_status);
1080:     PetscFree(send_status);
1081:   }
1082:   PetscFree(send_waits);
1083:   PetscFree(svalues);

1085:   return(0);
1086: }

1090: PetscErrorCode MatNorm_MPIRowbs(Mat mat,NormType type,PetscReal *norm)
1091: {
1092:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1093:   BSsprow      *vs,**rs;
1094:   PetscScalar  *xv;
1095:   PetscReal    sum = 0.0;
1097:   int          *xi,nz,i,j;

1100:   if (a->size == 1) {
1101:     MatNorm_MPIRowbs_local(mat,type,norm);
1102:   } else {
1103:     rs = a->A->rows;
1104:     if (type == NORM_FROBENIUS) {
1105:       for (i=0; i<mat->rmap.n; i++) {
1106:         vs = *rs++;
1107:         nz = vs->length;
1108:         xv = vs->nz;
1109:         while (nz--) {
1110: #if defined(PETSC_USE_COMPLEX)
1111:           sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
1112: #else
1113:           sum += (*xv)*(*xv); xv++;
1114: #endif
1115:         }
1116:       }
1117:       MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1118:       *norm = sqrt(*norm);
1119:     } else if (type == NORM_1) { /* max column norm */
1120:       PetscReal *tmp,*tmp2;
1121:       PetscMalloc(mat->cmap.n*sizeof(PetscReal),&tmp);
1122:       PetscMalloc(mat->cmap.n*sizeof(PetscReal),&tmp2);
1123:       PetscMemzero(tmp,mat->cmap.n*sizeof(PetscReal));
1124:       *norm = 0.0;
1125:       for (i=0; i<mat->rmap.n; i++) {
1126:         vs = *rs++;
1127:         nz = vs->length;
1128:         xi = vs->col;
1129:         xv = vs->nz;
1130:         while (nz--) {
1131:           tmp[*xi] += PetscAbsScalar(*xv);
1132:           xi++; xv++;
1133:         }
1134:       }
1135:       MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);
1136:       for (j=0; j<mat->cmap.n; j++) {
1137:         if (tmp2[j] > *norm) *norm = tmp2[j];
1138:       }
1139:       PetscFree(tmp);
1140:       PetscFree(tmp2);
1141:     } else if (type == NORM_INFINITY) { /* max row norm */
1142:       PetscReal ntemp = 0.0;
1143:       for (i=0; i<mat->rmap.n; i++) {
1144:         vs = *rs++;
1145:         nz = vs->length;
1146:         xv = vs->nz;
1147:         sum = 0.0;
1148:         while (nz--) {
1149:           sum += PetscAbsScalar(*xv); xv++;
1150:         }
1151:         if (sum > ntemp) ntemp = sum;
1152:       }
1153:       MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1154:     } else {
1155:       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1156:     }
1157:   }
1158:   return(0);
1159: }

1163: PetscErrorCode MatMult_MPIRowbs(Mat mat,Vec xx,Vec yy)
1164: {
1165:   Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
1166:   BSprocinfo   *bspinfo = bsif->procinfo;
1167:   PetscScalar  *xxa,*xworka,*yya;

1171:   if (!bsif->blocksolveassembly) {
1172:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1173:   }

1175:   /* Permute and apply diagonal scaling:  [ xwork = D^{1/2} * x ] */
1176:   if (!bsif->vecs_permscale) {
1177:     VecGetArray(bsif->xwork,&xworka);
1178:     VecGetArray(xx,&xxa);
1179:     BSperm_dvec(xxa,xworka,bsif->pA->perm);CHKERRBS(0);
1180:     VecRestoreArray(bsif->xwork,&xworka);
1181:     VecRestoreArray(xx,&xxa);
1182:     VecPointwiseDivide(xx,bsif->xwork,bsif->diag);
1183:   }

1185:   VecGetArray(xx,&xxa);
1186:   VecGetArray(yy,&yya);
1187:   /* Do lower triangular multiplication:  [ y = L * xwork ] */
1188:   if (bspinfo->single) {
1189:     BSforward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1190:   }  else {
1191:     BSforward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1192:   }
1193: 
1194:   /* Do upper triangular multiplication:  [ y = y + L^{T} * xwork ] */
1195:   if (mat->symmetric) {
1196:     if (bspinfo->single){
1197:       BSbackward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1198:     } else {
1199:       BSbackward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1200:     }
1201:   }
1202:   /* not needed for ILU version since forward does it all */
1203:   VecRestoreArray(xx,&xxa);
1204:   VecRestoreArray(yy,&yya);

1206:   /* Apply diagonal scaling to vector:  [  y = D^{1/2} * y ] */
1207:   if (!bsif->vecs_permscale) {
1208:     VecGetArray(bsif->xwork,&xworka);
1209:     VecGetArray(xx,&xxa);
1210:     BSiperm_dvec(xworka,xxa,bsif->pA->perm);CHKERRBS(0);
1211:     VecRestoreArray(bsif->xwork,&xworka);
1212:     VecRestoreArray(xx,&xxa);
1213:     VecPointwiseDivide(bsif->xwork,yy,bsif->diag);
1214:     VecGetArray(bsif->xwork,&xworka);
1215:     VecGetArray(yy,&yya);
1216:     BSiperm_dvec(xworka,yya,bsif->pA->perm);CHKERRBS(0);
1217:     VecRestoreArray(bsif->xwork,&xworka);
1218:     VecRestoreArray(yy,&yya);
1219:   }
1220:   PetscLogFlops(2*bsif->nz - mat->cmap.n);

1222:   return(0);
1223: }

1227: PetscErrorCode MatMultAdd_MPIRowbs(Mat mat,Vec xx,Vec yy,Vec zz)
1228: {
1230:   PetscScalar  one = 1.0;

1233:   (*mat->ops->mult)(mat,xx,zz);
1234:   VecAXPY(zz,one,yy);
1235:   return(0);
1236: }

1240: PetscErrorCode MatGetInfo_MPIRowbs(Mat A,MatInfoType flag,MatInfo *info)
1241: {
1242:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
1243:   PetscReal    isend[5],irecv[5];

1247:   info->rows_global    = (double)A->rmap.N;
1248:   info->columns_global = (double)A->cmap.N;
1249:   info->rows_local     = (double)A->cmap.n;
1250:   info->columns_local  = (double)A->rmap.n;
1251:   info->block_size     = 1.0;
1252:   info->mallocs        = (double)mat->reallocs;
1253:   isend[0] = mat->nz; isend[1] = mat->maxnz; isend[2] =  mat->maxnz -  mat->nz;
1254:   isend[3] = A->mem;  isend[4] = info->mallocs;

1256:   if (flag == MAT_LOCAL) {
1257:     info->nz_used      = isend[0];
1258:     info->nz_allocated = isend[1];
1259:     info->nz_unneeded  = isend[2];
1260:     info->memory       = isend[3];
1261:     info->mallocs      = isend[4];
1262:   } else if (flag == MAT_GLOBAL_MAX) {
1263:     MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_MAX,A->comm);
1264:     info->nz_used      = irecv[0];
1265:     info->nz_allocated = irecv[1];
1266:     info->nz_unneeded  = irecv[2];
1267:     info->memory       = irecv[3];
1268:     info->mallocs      = irecv[4];
1269:   } else if (flag == MAT_GLOBAL_SUM) {
1270:     MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_SUM,A->comm);
1271:     info->nz_used      = irecv[0];
1272:     info->nz_allocated = irecv[1];
1273:     info->nz_unneeded  = irecv[2];
1274:     info->memory       = irecv[3];
1275:     info->mallocs      = irecv[4];
1276:   }
1277:   return(0);
1278: }

1282: PetscErrorCode MatGetDiagonal_MPIRowbs(Mat mat,Vec v)
1283: {
1284:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1285:   BSsprow      **rs = a->A->rows;
1287:   int          i,n;
1288:   PetscScalar  *x,zero = 0.0;

1291:   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1292:   if (!a->blocksolveassembly) {
1293:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1294:   }

1296:   VecSet(v,zero);
1297:   VecGetLocalSize(v,&n);
1298:   if (n != mat->rmap.n) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
1299:   VecGetArray(v,&x);
1300:   for (i=0; i<mat->rmap.n; i++) {
1301:     x[i] = rs[i]->nz[rs[i]->diag_ind];
1302:   }
1303:   VecRestoreArray(v,&x);
1304:   return(0);
1305: }

1309: PetscErrorCode MatDestroy_MPIRowbs(Mat mat)
1310: {
1311:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1312:   BSspmat      *A = a->A;
1313:   BSsprow      *vs;
1315:   int          i;

1318: #if defined(PETSC_USE_LOG)
1319:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->rmap.N,mat->cmap.N);
1320: #endif
1321:   MatStashDestroy_Private(&mat->stash);
1322:   if (a->bsmap) {
1323:     PetscFree(a->bsmap->vlocal2global);
1324:     PetscFree(a->bsmap->vglobal2local);
1325:     if (a->bsmap->vglobal2proc)  (*a->bsmap->free_g2p)(a->bsmap->vglobal2proc);
1326:     PetscFree(a->bsmap);
1327:   }

1329:   if (A) {
1330:     for (i=0; i<mat->rmap.n; i++) {
1331:       vs = A->rows[i];
1332:       MatFreeRowbs_Private(mat,vs->length,vs->col,vs->nz);
1333:     }
1334:     /* Note: A->map = a->bsmap is freed above */
1335:     PetscFree(A->rows);
1336:     PetscFree(A);
1337:   }
1338:   if (a->procinfo) {BSfree_ctx(a->procinfo);CHKERRBS(0);}
1339:   if (a->diag)     {VecDestroy(a->diag);}
1340:   if (a->xwork)    {VecDestroy(a->xwork);}
1341:   if (a->pA)       {BSfree_par_mat(a->pA);CHKERRBS(0);}
1342:   if (a->fpA)      {BSfree_copy_par_mat(a->fpA);CHKERRBS(0);}
1343:   if (a->comm_pA)  {BSfree_comm(a->comm_pA);CHKERRBS(0);}
1344:   if (a->comm_fpA) {BSfree_comm(a->comm_fpA);CHKERRBS(0);}
1345:   PetscFree(a->imax);
1346:   MPI_Comm_free(&(a->comm_mpirowbs));
1347:   PetscFree(a);

1349:   PetscObjectChangeTypeName((PetscObject)mat,0);
1350:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIRowbsSetPreallocation_C","",PETSC_NULL);
1351:   return(0);
1352: }

1356: PetscErrorCode MatSetOption_MPIRowbs(Mat A,MatOption op)
1357: {
1358:   Mat_MPIRowbs   *a = (Mat_MPIRowbs*)A->data;

1362:   switch (op) {
1363:   case MAT_ROW_ORIENTED:
1364:     a->roworiented = PETSC_TRUE;
1365:     break;
1366:   case MAT_COLUMN_ORIENTED:
1367:     a->roworiented = PETSC_FALSE;
1368:     break;
1369:   case MAT_COLUMNS_SORTED:
1370:     a->sorted      = 1;
1371:     break;
1372:   case MAT_COLUMNS_UNSORTED:
1373:     a->sorted      = 0;
1374:     break;
1375:   case MAT_NO_NEW_NONZERO_LOCATIONS:
1376:     a->nonew       = 1;
1377:     break;
1378:   case MAT_YES_NEW_NONZERO_LOCATIONS:
1379:     a->nonew       = 0;
1380:     break;
1381:   case MAT_DO_NOT_USE_INODES:
1382:     a->bs_color_single = 1;
1383:     break;
1384:   case MAT_YES_NEW_DIAGONALS:
1385:   case MAT_ROWS_SORTED:
1386:   case MAT_NEW_NONZERO_LOCATION_ERR:
1387:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1388:   case MAT_ROWS_UNSORTED:
1389:   case MAT_USE_HASH_TABLE:
1390:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1391:     break;
1392:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1393:     a->donotstash = PETSC_TRUE;
1394:     break;
1395:   case MAT_NO_NEW_DIAGONALS:
1396:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1397:     break;
1398:   case MAT_KEEP_ZEROED_ROWS:
1399:     a->keepzeroedrows    = PETSC_TRUE;
1400:     break;
1401:   case MAT_SYMMETRIC:
1402:     BSset_mat_symmetric(a->A,PETSC_TRUE);CHKERRBS(0);
1403:     break;
1404:   case MAT_STRUCTURALLY_SYMMETRIC:
1405:   case MAT_NOT_SYMMETRIC:
1406:   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1407:   case MAT_HERMITIAN:
1408:   case MAT_NOT_HERMITIAN:
1409:   case MAT_SYMMETRY_ETERNAL:
1410:   case MAT_NOT_SYMMETRY_ETERNAL:
1411:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1412:     break;
1413:   default:
1414:     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1415:     break;
1416:   }
1417:   return(0);
1418: }

1422: PetscErrorCode MatGetRow_MPIRowbs(Mat AA,int row,int *nz,int **idx,PetscScalar **v)
1423: {
1424:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
1425:   BSspmat      *A = mat->A;
1426:   BSsprow      *rs;
1427: 
1429:   if (row < AA->rmap.rstart || row >= AA->rmap.rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");

1431:   rs  = A->rows[row - AA->rmap.rstart];
1432:   *nz = rs->length;
1433:   if (v)   *v   = rs->nz;
1434:   if (idx) *idx = rs->col;
1435:   return(0);
1436: }

1440: PetscErrorCode MatRestoreRow_MPIRowbs(Mat A,int row,int *nz,int **idx,PetscScalar **v)
1441: {
1443:   return(0);
1444: }

1446: /* ------------------------------------------------------------------ */

1450: PetscErrorCode MatSetUpPreallocation_MPIRowbs(Mat A)
1451: {

1455:    MatMPIRowbsSetPreallocation(A,