Actual source code: mpiaij.c

  1: #define PETSCMAT_DLL

 3:  #include src/mat/impls/aij/mpi/mpiaij.h
 4:  #include src/inline/spops.h

  6: /* 
  7:   Local utility routine that creates a mapping from the global column 
  8: number to the local number in the off-diagonal part of the local 
  9: storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at 
 10: a slightly higher hash table cost; without it it is not scalable (each processor
 11: has an order N integer array but is fast to acess.
 12: */
 15: PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
 16: {
 17:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
 19:   PetscInt       n = aij->B->cmap.n,i;

 22: #if defined (PETSC_USE_CTABLE)
 23:   PetscTableCreate(n,&aij->colmap);
 24:   for (i=0; i<n; i++){
 25:     PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);
 26:   }
 27: #else
 28:   PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);
 29:   PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));
 30:   PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));
 31:   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
 32: #endif
 33:   return(0);
 34: }


 37: #define CHUNKSIZE   15
 38: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
 39: { \
 40:     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
 41:     lastcol1 = col;\
 42:     while (high1-low1 > 5) { \
 43:       t = (low1+high1)/2; \
 44:       if (rp1[t] > col) high1 = t; \
 45:       else             low1  = t; \
 46:     } \
 47:       for (_i=low1; _i<high1; _i++) { \
 48:         if (rp1[_i] > col) break; \
 49:         if (rp1[_i] == col) { \
 50:           if (addv == ADD_VALUES) ap1[_i] += value;   \
 51:           else                    ap1[_i] = value; \
 52:           goto a_noinsert; \
 53:         } \
 54:       }  \
 55:       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
 56:       if (nonew == 1) goto a_noinsert; \
 57:       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
 58:       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
 59:       N = nrow1++ - 1; a->nz++; high1++; \
 60:       /* shift up all the later entries in this row */ \
 61:       for (ii=N; ii>=_i; ii--) { \
 62:         rp1[ii+1] = rp1[ii]; \
 63:         ap1[ii+1] = ap1[ii]; \
 64:       } \
 65:       rp1[_i] = col;  \
 66:       ap1[_i] = value;  \
 67:       a_noinsert: ; \
 68:       ailen[row] = nrow1; \
 69: } 


 72: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
 73: { \
 74:     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
 75:     lastcol2 = col;\
 76:     while (high2-low2 > 5) { \
 77:       t = (low2+high2)/2; \
 78:       if (rp2[t] > col) high2 = t; \
 79:       else             low2  = t; \
 80:     } \
 81:        for (_i=low2; _i<high2; _i++) { \
 82:         if (rp2[_i] > col) break; \
 83:         if (rp2[_i] == col) { \
 84:           if (addv == ADD_VALUES) ap2[_i] += value;   \
 85:           else                    ap2[_i] = value; \
 86:           goto b_noinsert; \
 87:         } \
 88:       }  \
 89:       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
 90:       if (nonew == 1) goto b_noinsert; \
 91:       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
 92:       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
 93:       N = nrow2++ - 1; b->nz++; high2++;\
 94:       /* shift up all the later entries in this row */ \
 95:       for (ii=N; ii>=_i; ii--) { \
 96:         rp2[ii+1] = rp2[ii]; \
 97:         ap2[ii+1] = ap2[ii]; \
 98:       } \
 99:       rp2[_i] = col;  \
100:       ap2[_i] = value;  \
101:       b_noinsert: ; \
102:       bilen[row] = nrow2; \
103: }

107: PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108: {
109:   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
112:   PetscInt       l,*garray = mat->garray,diag;

115:   /* code only works for square matrices A */

117:   /* find size of row to the left of the diagonal part */
118:   MatGetOwnershipRange(A,&diag,0);
119:   row  = row - diag;
120:   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121:     if (garray[b->j[b->i[row]+l]] > diag) break;
122:   }
123:   PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));

125:   /* diagonal part */
126:   PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));

128:   /* right of diagonal part */
129:   PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));
130:   return(0);
131: }

135: PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136: {
137:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138:   PetscScalar    value;
140:   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141:   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142:   PetscTruth     roworiented = aij->roworiented;

144:   /* Some Variables required in the macro */
145:   Mat            A = aij->A;
146:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147:   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148:   PetscScalar    *aa = a->a;
149:   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150:   Mat            B = aij->B;
151:   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152:   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153:   PetscScalar    *ba = b->a;

155:   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156:   PetscInt       nonew = a->nonew;
157:   PetscScalar    *ap1,*ap2;

160:   for (i=0; i<m; i++) {
161:     if (im[i] < 0) continue;
162: #if defined(PETSC_USE_DEBUG)
163:     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164: #endif
165:     if (im[i] >= rstart && im[i] < rend) {
166:       row      = im[i] - rstart;
167:       lastcol1 = -1;
168:       rp1      = aj + ai[row];
169:       ap1      = aa + ai[row];
170:       rmax1    = aimax[row];
171:       nrow1    = ailen[row];
172:       low1     = 0;
173:       high1    = nrow1;
174:       lastcol2 = -1;
175:       rp2      = bj + bi[row];
176:       ap2      = ba + bi[row];
177:       rmax2    = bimax[row];
178:       nrow2    = bilen[row];
179:       low2     = 0;
180:       high2    = nrow2;

182:       for (j=0; j<n; j++) {
183:         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184:         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185:         if (in[j] >= cstart && in[j] < cend){
186:           col = in[j] - cstart;
187:           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188:         } else if (in[j] < 0) continue;
189: #if defined(PETSC_USE_DEBUG)
190:         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191: #endif
192:         else {
193:           if (mat->was_assembled) {
194:             if (!aij->colmap) {
195:               CreateColmap_MPIAIJ_Private(mat);
196:             }
197: #if defined (PETSC_USE_CTABLE)
198:             PetscTableFind(aij->colmap,in[j]+1,&col);
199:             col--;
200: #else
201:             col = aij->colmap[in[j]] - 1;
202: #endif
203:             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204:               DisAssemble_MPIAIJ(mat);
205:               col =  in[j];
206:               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207:               B = aij->B;
208:               b = (Mat_SeqAIJ*)B->data;
209:               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210:               rp2      = bj + bi[row];
211:               ap2      = ba + bi[row];
212:               rmax2    = bimax[row];
213:               nrow2    = bilen[row];
214:               low2     = 0;
215:               high2    = nrow2;
216:               bm       = aij->B->rmap.n;
217:               ba = b->a;
218:             }
219:           } else col = in[j];
220:           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221:         }
222:       }
223:     } else {
224:       if (!aij->donotstash) {
225:         if (roworiented) {
226:           if (ignorezeroentries && v[i*n] == 0.0) continue;
227:           MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
228:         } else {
229:           if (ignorezeroentries && v[i] == 0.0) continue;
230:           MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
231:         }
232:       }
233:     }
234:   }
235:   return(0);
236: }

240: PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241: {
242:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
244:   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245:   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;

248:   for (i=0; i<m; i++) {
249:     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
250:     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251:     if (idxm[i] >= rstart && idxm[i] < rend) {
252:       row = idxm[i] - rstart;
253:       for (j=0; j<n; j++) {
254:         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
255:         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256:         if (idxn[j] >= cstart && idxn[j] < cend){
257:           col = idxn[j] - cstart;
258:           MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
259:         } else {
260:           if (!aij->colmap) {
261:             CreateColmap_MPIAIJ_Private(mat);
262:           }
263: #if defined (PETSC_USE_CTABLE)
264:           PetscTableFind(aij->colmap,idxn[j]+1,&col);
265:           col --;
266: #else
267:           col = aij->colmap[idxn[j]] - 1;
268: #endif
269:           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270:           else {
271:             MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
272:           }
273:         }
274:       }
275:     } else {
276:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277:     }
278:   }
279:   return(0);
280: }

284: PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285: {
286:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
288:   PetscInt       nstash,reallocs;
289:   InsertMode     addv;

292:   if (aij->donotstash) {
293:     return(0);
294:   }

296:   /* make sure all processors are either in INSERTMODE or ADDMODE */
297:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
298:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
299:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300:   }
301:   mat->insertmode = addv; /* in case this processor had no cache */

303:   MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);
304:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
305:   PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
306:   return(0);
307: }

311: PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312: {
313:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
314:   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
316:   PetscMPIInt    n;
317:   PetscInt       i,j,rstart,ncols,flg;
318:   PetscInt       *row,*col,other_disassembled;
319:   PetscScalar    *val;
320:   InsertMode     addv = mat->insertmode;

322:   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
324:   if (!aij->donotstash) {
325:     while (1) {
326:       MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
327:       if (!flg) break;

329:       for (i=0; i<n;) {
330:         /* Now identify the consecutive vals belonging to the same row */
331:         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332:         if (j < n) ncols = j-i;
333:         else       ncols = n-i;
334:         /* Now assemble all these values with a single function call */
335:         MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
336:         i = j;
337:       }
338:     }
339:     MatStashScatterEnd_Private(&mat->stash);
340:   }
341:   a->compressedrow.use     = PETSC_FALSE;
342:   MatAssemblyBegin(aij->A,mode);
343:   MatAssemblyEnd(aij->A,mode);

345:   /* determine if any processor has disassembled, if so we must 
346:      also disassemble ourselfs, in order that we may reassemble. */
347:   /*
348:      if nonzero structure of submatrix B cannot change then we know that
349:      no processor disassembled thus we can skip this stuff
350:   */
351:   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352:     MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
353:     if (mat->was_assembled && !other_disassembled) {
354:       DisAssemble_MPIAIJ(mat);
355:     }
356:   }
357:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
358:     MatSetUpMultiply_MPIAIJ(mat);
359:   }
360:   MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);
361:   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
362:   MatAssemblyBegin(aij->B,mode);
363:   MatAssemblyEnd(aij->B,mode);

365:   PetscFree(aij->rowvalues);
366:   aij->rowvalues = 0;

368:   /* used by MatAXPY() */
369:   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
370:   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */

372:   return(0);
373: }

377: PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
378: {
379:   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;

383:   MatZeroEntries(l->A);
384:   MatZeroEntries(l->B);
385:   return(0);
386: }

390: PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
391: {
392:   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
394:   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
395:   PetscInt       i,*owners = A->rmap.range;
396:   PetscInt       *nprocs,j,idx,nsends,row;
397:   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
398:   PetscInt       *rvalues,count,base,slen,*source;
399:   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
400:   MPI_Comm       comm = A->comm;
401:   MPI_Request    *send_waits,*recv_waits;
402:   MPI_Status     recv_status,*send_status;
403: #if defined(PETSC_DEBUG)
404:   PetscTruth     found = PETSC_FALSE;
405: #endif

408:   /*  first count number of contributors to each processor */
409:   PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
410:   PetscMemzero(nprocs,2*size*sizeof(PetscInt));
411:   PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
412:   j = 0;
413:   for (i=0; i<N; i++) {
414:     if (lastidx > (idx = rows[i])) j = 0;
415:     lastidx = idx;
416:     for (; j<size; j++) {
417:       if (idx >= owners[j] && idx < owners[j+1]) {
418:         nprocs[2*j]++;
419:         nprocs[2*j+1] = 1;
420:         owner[i] = j;
421: #if defined(PETSC_DEBUG)
422:         found = PETSC_TRUE;
423: #endif
424:         break;
425:       }
426:     }
427: #if defined(PETSC_DEBUG)
428:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
429:     found = PETSC_FALSE;
430: #endif
431:   }
432:   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}

434:   /* inform other processors of number of messages and max length*/
435:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

437:   /* post receives:   */
438:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
439:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
440:   for (i=0; i<nrecvs; i++) {
441:     MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
442:   }

444:   /* do sends:
445:       1) starts[i] gives the starting index in svalues for stuff going to 
446:          the ith processor
447:   */
448:   PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
449:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
450:   PetscMalloc((size+1)*sizeof(PetscInt),&starts);
451:   starts[0] = 0;
452:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
453:   for (i=0; i<N; i++) {
454:     svalues[starts[owner[i]]++] = rows[i];
455:   }

457:   starts[0] = 0;
458:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
459:   count = 0;
460:   for (i=0; i<size; i++) {
461:     if (nprocs[2*i+1]) {
462:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
463:     }
464:   }
465:   PetscFree(starts);

467:   base = owners[rank];

469:   /*  wait on receives */
470:   PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);
471:   source = lens + nrecvs;
472:   count  = nrecvs; slen = 0;
473:   while (count) {
474:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
475:     /* unpack receives into our local space */
476:     MPI_Get_count(&recv_status,MPIU_INT,&n);
477:     source[imdex]  = recv_status.MPI_SOURCE;
478:     lens[imdex]    = n;
479:     slen          += n;
480:     count--;
481:   }
482:   PetscFree(recv_waits);
483: 
484:   /* move the data into the send scatter */
485:   PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
486:   count = 0;
487:   for (i=0; i<nrecvs; i++) {
488:     values = rvalues + i*nmax;
489:     for (j=0; j<lens[i]; j++) {
490:       lrows[count++] = values[j] - base;
491:     }
492:   }
493:   PetscFree(rvalues);
494:   PetscFree(lens);
495:   PetscFree(owner);
496:   PetscFree(nprocs);
497: 
498:   /* actually zap the local rows */
499:   /*
500:         Zero the required rows. If the "diagonal block" of the matrix
501:      is square and the user wishes to set the diagonal we use separate
502:      code so that MatSetValues() is not called for each diagonal allocating
503:      new memory, thus calling lots of mallocs and slowing things down.

505:        Contributed by: Matthew Knepley
506:   */
507:   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
508:   MatZeroRows(l->B,slen,lrows,0.0);
509:   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
510:     MatZeroRows(l->A,slen,lrows,diag);
511:   } else if (diag != 0.0) {
512:     MatZeroRows(l->A,slen,lrows,0.0);
513:     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
514:       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
515: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
516:     }
517:     for (i = 0; i < slen; i++) {
518:       row  = lrows[i] + rstart;
519:       MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);
520:     }
521:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
522:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
523:   } else {
524:     MatZeroRows(l->A,slen,lrows,0.0);
525:   }
526:   PetscFree(lrows);

528:   /* wait on sends */
529:   if (nsends) {
530:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
531:     MPI_Waitall(nsends,send_waits,send_status);
532:     PetscFree(send_status);
533:   }
534:   PetscFree(send_waits);
535:   PetscFree(svalues);

537:   return(0);
538: }

542: PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
543: {
544:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
546:   PetscInt       nt;

549:   VecGetLocalSize(xx,&nt);
550:   if (nt != A->cmap.n) {
551:     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
552:   }
553:   VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
554:   (*a->A->ops->mult)(a->A,xx,yy);
555:   VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
556:   (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
557:   return(0);
558: }

562: PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
563: {
564:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

568:   VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
569:   (*a->A->ops->multadd)(a->A,xx,yy,zz);
570:   VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
571:   (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
572:   return(0);
573: }

577: PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
578: {
579:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
581:   PetscTruth     merged;

584:   VecScatterGetMerged(a->Mvctx,&merged);
585:   /* do nondiagonal part */
586:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
587:   if (!merged) {
588:     /* send it on its way */
589:     VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
590:     /* do local part */
591:     (*a->A->ops->multtranspose)(a->A,xx,yy);
592:     /* receive remote parts: note this assumes the values are not actually */
593:     /* added in yy until the next line, */
594:     VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
595:   } else {
596:     /* do local part */
597:     (*a->A->ops->multtranspose)(a->A,xx,yy);
598:     /* send it on its way */
599:     VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
600:     /* values actually were received in the Begin() but we need to call this nop */
601:     VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
602:   }
603:   return(0);
604: }

609: PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
610: {
611:   MPI_Comm       comm;
612:   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
613:   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
614:   IS             Me,Notme;
616:   PetscInt       M,N,first,last,*notme,i;
617:   PetscMPIInt    size;


621:   /* Easy test: symmetric diagonal block */
622:   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
623:   MatIsTranspose(Adia,Bdia,tol,f);
624:   if (!*f) return(0);
625:   PetscObjectGetComm((PetscObject)Amat,&comm);
626:   MPI_Comm_size(comm,&size);
627:   if (size == 1) return(0);

629:   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
630:   MatGetSize(Amat,&M,&N);
631:   MatGetOwnershipRange(Amat,&first,&last);
632:   PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);
633:   for (i=0; i<first; i++) notme[i] = i;
634:   for (i=last; i<M; i++) notme[i-last+first] = i;
635:   ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);
636:   ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
637:   MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
638:   Aoff = Aoffs[0];
639:   MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
640:   Boff = Boffs[0];
641:   MatIsTranspose(Aoff,Boff,tol,f);
642:   MatDestroyMatrices(1,&Aoffs);
643:   MatDestroyMatrices(1,&Boffs);
644:   ISDestroy(Me);
645:   ISDestroy(Notme);

647:   return(0);
648: }

653: PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
654: {
655:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

659:   /* do nondiagonal part */
660:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
661:   /* send it on its way */
662:   VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
663:   /* do local part */
664:   (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
665:   /* receive remote parts */
666:   VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
667:   return(0);
668: }

670: /*
671:   This only works correctly for square matrices where the subblock A->A is the 
672:    diagonal block
673: */
676: PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
677: {
679:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

682:   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
683:   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
684:     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
685:   }
686:   MatGetDiagonal(a->A,v);
687:   return(0);
688: }

692: PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
693: {
694:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

698:   MatScale(a->A,aa);
699:   MatScale(a->B,aa);
700:   return(0);
701: }

705: PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
706: {
707:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;

711: #if defined(PETSC_USE_LOG)
712:   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
713: #endif
714:   MatStashDestroy_Private(&mat->stash);
715:   MatDestroy(aij->A);
716:   MatDestroy(aij->B);
717: #if defined (PETSC_USE_CTABLE)
718:   if (aij->colmap) {PetscTableDestroy(aij->colmap);}
719: #else
720:   PetscFree(aij->colmap);
721: #endif
722:   PetscFree(aij->garray);
723:   if (aij->lvec)   {VecDestroy(aij->lvec);}
724:   if (aij->Mvctx)  {VecScatterDestroy(aij->Mvctx);}
725:   PetscFree(aij->rowvalues);
726:   PetscFree(aij->ld);
727:   PetscFree(aij);

729:   PetscObjectChangeTypeName((PetscObject)mat,0);
730:   PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);
731:   PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);
732:   PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
733:   PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);
734:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);
735:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);
736:   PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);
737:   return(0);
738: }

742: PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
743: {
744:   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
745:   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
746:   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
747:   PetscErrorCode    ierr;
748:   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
749:   int               fd;
750:   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
751:   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
752:   PetscScalar       *column_values;

755:   MPI_Comm_rank(mat->comm,&rank);
756:   MPI_Comm_size(mat->comm,&size);
757:   nz   = A->nz + B->nz;
758:   if (!rank) {
759:     header[0] = MAT_FILE_COOKIE;
760:     header[1] = mat->rmap.N;
761:     header[2] = mat->cmap.N;
762:     MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);
763:     PetscViewerBinaryGetDescriptor(viewer,&fd);
764:     PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
765:     /* get largest number of rows any processor has */
766:     rlen = mat->rmap.n;
767:     range = mat->rmap.range;
768:     for (i=1; i<size; i++) {
769:       rlen = PetscMax(rlen,range[i+1] - range[i]);
770:     }
771:   } else {
772:     MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);
773:     rlen = mat->rmap.n;
774:   }

776:   /* load up the local row counts */
777:   PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);
778:   for (i=0; i<mat->rmap.n; i++) {
779:     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780:   }

782:   /* store the row lengths to the file */
783:   if (!rank) {
784:     MPI_Status status;
785:     PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);
786:     for (i=1; i<size; i++) {
787:       rlen = range[i+1] - range[i];
788:       MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);
789:       PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);
790:     }
791:   } else {
792:     MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);
793:   }
794:   PetscFree(row_lengths);

796:   /* load up the local column indices */
797:   nzmax = nz; /* )th processor needs space a largest processor needs */
798:   MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);
799:   PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);
800:   cnt  = 0;
801:   for (i=0; i<mat->rmap.n; i++) {
802:     for (j=B->i[i]; j<B->i[i+1]; j++) {
803:       if ( (col = garray[B->j[j]]) > cstart) break;
804:       column_indices[cnt++] = col;
805:     }
806:     for (k=A->i[i]; k<A->i[i+1]; k++) {
807:       column_indices[cnt++] = A->j[k] + cstart;
808:     }
809:     for (; j<B->i[i+1]; j++) {
810:       column_indices[cnt++] = garray[B->j[j]];
811:     }
812:   }
813:   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);

815:   /* store the column indices to the file */
816:   if (!rank) {
817:     MPI_Status status;
818:     PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
819:     for (i=1; i<size; i++) {
820:       MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);
821:       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
822:       MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);
823:       PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);
824:     }
825:   } else {
826:     MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);
827:     MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);
828:   }
829:   PetscFree(column_indices);

831:   /* load up the local column values */
832:   PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);
833:   cnt  = 0;
834:   for (i=0; i<mat->rmap.n; i++) {
835:     for (j=B->i[i]; j<B->i[i+1]; j++) {
836:       if ( garray[B->j[j]] > cstart) break;
837:       column_values[cnt++] = B->a[j];
838:     }
839:     for (k=A->i[i]; k<A->i[i+1]; k++) {
840:       column_values[cnt++] = A->a[k];
841:     }
842:     for (; j<B->i[i+1]; j++) {
843:       column_values[cnt++] = B->a[j];
844:     }
845:   }
846:   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);

848:   /* store the column values to the file */
849:   if (!rank) {
850:     MPI_Status status;
851:     PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
852:     for (i=1; i<size; i++) {
853:       MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);
854:       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
855:       MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);
856:       PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);
857:     }
858:   } else {
859:     MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);
860:     MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);
861:   }
862:   PetscFree(column_values);
863:   return(0);
864: }

868: PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869: {
870:   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
871:   PetscErrorCode    ierr;
872:   PetscMPIInt       rank = aij->rank,size = aij->size;
873:   PetscTruth        isdraw,iascii,isbinary;
874:   PetscViewer       sviewer;
875:   PetscViewerFormat format;

878:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
879:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
880:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
881:   if (iascii) {
882:     PetscViewerGetFormat(viewer,&format);
883:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884:       MatInfo    info;
885:       PetscTruth inodes;

887:       MPI_Comm_rank(mat->comm,&rank);
888:       MatGetInfo(mat,MAT_LOCAL,&info);
889:       MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);
890:       if (!inodes) {
891:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
892:                                               rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
893:       } else {
894:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
895:                     rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
896:       }
897:       MatGetInfo(aij->A,MAT_LOCAL,&info);
898:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
899:       MatGetInfo(aij->B,MAT_LOCAL,&info);
900:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
901:       PetscViewerFlush(viewer);
902:       PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
903:       VecScatterView(aij->Mvctx,viewer);
904:       return(0);
905:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
906:       PetscInt   inodecount,inodelimit,*inodes;
907:       MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);
908:       if (inodes) {
909:         PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);
910:       } else {
911:         PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");
912:       }
913:       return(0);
914:     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
915:       return(0);
916:     }
917:   } else if (isbinary) {
918:     if (size == 1) {
919:       PetscObjectSetName((PetscObject)aij->A,mat->name);
920:       MatView(aij->A,viewer);
921:     } else {
922:       MatView_MPIAIJ_Binary(mat,viewer);
923:     }
924:     return(0);
925:   } else if (isdraw) {
926:     PetscDraw  draw;
927:     PetscTruth isnull;
928:     PetscViewerDrawGetDraw(viewer,0,&draw);
929:     PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
930:   }

932:   if (size == 1) {
933:     PetscObjectSetName((PetscObject)aij->A,mat->name);
934:     MatView(aij->A,viewer);
935:   } else {
936:     /* assemble the entire matrix onto first processor. */
937:     Mat         A;
938:     Mat_SeqAIJ  *Aloc;
939:     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
940:     PetscScalar *a;

942:     MatCreate(mat->comm,&A);
943:     if (!rank) {
944:       MatSetSizes(A,M,N,M,N);
945:     } else {
946:       MatSetSizes(A,0,0,M,N);
947:     }
948:     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
949:     MatSetType(A,MATMPIAIJ);
950:     MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);
951:     PetscLogObjectParent(mat,A);

953:     /* copy over the A part */
954:     Aloc = (Mat_SeqAIJ*)aij->A->data;
955:     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
956:     row = mat->rmap.rstart;
957:     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
958:     for (i=0; i<m; i++) {
959:       MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
960:       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
961:     }
962:     aj = Aloc->j;
963:     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}

965:     /* copy over the B part */
966:     Aloc = (Mat_SeqAIJ*)aij->B->data;
967:     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
968:     row  = mat->rmap.rstart;
969:     PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);
970:     ct   = cols;
971:     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
972:     for (i=0; i<m; i++) {
973:       MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
974:       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
975:     }
976:     PetscFree(ct);
977:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
978:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
979:     /* 
980:        Everyone has to call to draw the matrix since the graphics waits are
981:        synchronized across all processors that share the PetscDraw object
982:     */
983:     PetscViewerGetSingleton(viewer,&sviewer);
984:     if (!rank) {
985:       PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);
986:       MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
987:     }
988:     PetscViewerRestoreSingleton(viewer,&sviewer);
989:     MatDestroy(A);
990:   }
991:   return(0);
992: }

996: PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
997: {
999:   PetscTruth     iascii,isdraw,issocket,isbinary;
1000: 
1002:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1003:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1004:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1005:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
1006:   if (iascii || isdraw || isbinary || issocket) {
1007:     MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
1008:   } else {
1009:     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1010:   }
1011:   return(0);
1012: }

1016: PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1017: {
1018:   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1020:   Vec            bb1;

1023:   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);

1025:   VecDuplicate(bb,&bb1);

1027:   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1028:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1029:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
1030:       its--;
1031:     }
1032: 
1033:     while (its--) {
1034:       VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1035:       VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);

1037:       /* update rhs: bb1 = bb - B*x */
1038:       VecScale(mat->lvec,-1.0);
1039:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

1041:       /* local sweep */
1042:       (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1043: 
1044:     }
1045:   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1046:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1047:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1048:       its--;
1049:     }
1050:     while (its--) {
1051:       VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1052:       VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);

1054:       /* update rhs: bb1 = bb - B*x */
1055:       VecScale(mat->lvec,-1.0);
1056:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

1058:       /* local sweep */
1059:       (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1060: 
1061:     }
1062:   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1063:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1064:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1065:       its--;
1066:     }
1067:     while (its--) {
1068:       VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1069:       VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);

1071:       /* update rhs: bb1 = bb - B*x */
1072:       VecScale(mat->lvec,-1.0);
1073:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

1075:       /* local sweep */
1076:       (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1077: 
1078:     }
1079:   } else {
1080:     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1081:   }

1083:   VecDestroy(bb1);
1084:   return(0);
1085: }

1089: PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1090: {
1091:   MPI_Comm       comm,pcomm;
1092:   PetscInt       first,local_size,nrows,*rows;
1093:   int            ntids;
1094:   IS             crowp,growp,irowp,lrowp,lcolp,icolp;

1098:   PetscObjectGetComm((PetscObject)A,&comm);
1099:   /* make a collective version of 'rowp' */
1100:   PetscObjectGetComm((PetscObject)rowp,&pcomm);
1101:   if (pcomm==comm) {
1102:     crowp = rowp;
1103:   } else {
1104:     ISGetSize(rowp,&nrows);
1105:     ISGetIndices(rowp,&rows);
1106:     ISCreateGeneral(comm,nrows,rows,&crowp);
1107:     ISRestoreIndices(rowp,&rows);
1108:   }
1109:   /* collect the global row permutation and invert it */
1110:   ISAllGather(crowp,&growp);
1111:   ISSetPermutation(growp);
1112:   if (pcomm!=comm) {
1113:     ISDestroy(crowp);
1114:   }
1115:   ISInvertPermutation(growp,PETSC_DECIDE,&irowp);
1116:   /* get the local target indices */
1117:   MatGetOwnershipRange(A,&first,PETSC_NULL);
1118:   MatGetLocalSize(A,&local_size,PETSC_NULL);
1119:   ISGetIndices(irowp,&rows);
1120:   ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);
1121:   ISRestoreIndices(irowp,&rows);
1122:   ISDestroy(irowp);
1123:   /* the column permutation is so much easier;
1124:      make a local version of 'colp' and invert it */
1125:   PetscObjectGetComm((PetscObject)colp,&pcomm);
1126:   MPI_Comm_size(pcomm,&ntids);
1127:   if (ntids==1) {
1128:     lcolp = colp;
1129:   } else {
1130:     ISGetSize(colp,&nrows);
1131:     ISGetIndices(colp,&rows);
1132:     ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);
1133:   }
1134:   ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);
1135:   ISSetPermutation(lcolp);
1136:   if (ntids>1) {
1137:     ISRestoreIndices(colp,&rows);
1138:     ISDestroy(lcolp);
1139:   }
1140:   /* now we just get the submatrix */
1141:   MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);
1142:   /* clean up */
1143:   ISDestroy(lrowp);
1144:   ISDestroy(icolp);
1145:   return(0);
1146: }

1150: PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1151: {
1152:   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1153:   Mat            A = mat->A,B = mat->B;
1155:   PetscReal      isend[5],irecv[5];

1158:   info->block_size     = 1.0;
1159:   MatGetInfo(A,MAT_LOCAL,info);
1160:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1161:   isend[3] = info->memory;  isend[4] = info->mallocs;
1162:   MatGetInfo(B,MAT_LOCAL,info);
1163:   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1164:   isend[3] += info->memory;  isend[4] += info->mallocs;
1165:   if (flag == MAT_LOCAL) {
1166:     info->nz_used      = isend[0];
1167:     info->nz_allocated = isend[1];
1168:     info->nz_unneeded  = isend[2];
1169:     info->memory       = isend[3];
1170:     info->mallocs      = isend[4];
1171:   } else if (flag == MAT_GLOBAL_MAX) {
1172:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
1173:     info->nz_used      = irecv[0];
1174:     info->nz_allocated = irecv[1];
1175:     info->nz_unneeded  = irecv[2];
1176:     info->memory       = irecv[3];
1177:     info->mallocs      = irecv[4];
1178:   } else if (flag == MAT_GLOBAL_SUM) {
1179:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);