Actual source code: maij.c

  1: #define PETSCMAT_DLL

  3: /*
  4:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  5:   This format is used for restriction and interpolation operations for 
  6:   multicomponent problems. It interpolates each component the same way
  7:   independently.

  9:      We provide:
 10:          MatMult()
 11:          MatMultTranspose()
 12:          MatMultTransposeAdd()
 13:          MatMultAdd()
 14:           and
 15:          MatCreateMAIJ(Mat,dof,Mat*)

 17:      This single directory handles both the sequential and parallel codes
 18: */

 20:  #include src/mat/impls/maij/maij.h
 21:  #include src/mat/utils/freespace.h
 22:  #include private/vecimpl.h

 26: PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
 27: {
 29:   PetscTruth     ismpimaij,isseqmaij;

 32:   PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 33:   PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 34:   if (ismpimaij) {
 35:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 37:     *B = b->A;
 38:   } else if (isseqmaij) {
 39:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 41:     *B = b->AIJ;
 42:   } else {
 43:     *B = A;
 44:   }
 45:   return(0);
 46: }

 50: PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
 51: {
 53:   Mat            Aij;

 56:   MatMAIJGetAIJ(A,&Aij);
 57:   MatCreateMAIJ(Aij,dof,B);
 58:   return(0);
 59: }

 63: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 64: {
 66:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;

 69:   if (b->AIJ) {
 70:     MatDestroy(b->AIJ);
 71:   }
 72:   PetscFree(b);
 73:   return(0);
 74: }

 78: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
 79: {
 81:   Mat            B;

 84:   MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
 85:   MatView(B,viewer);
 86:   MatDestroy(B);
 87:   return(0);
 88: }

 92: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
 93: {
 95:   Mat            B;

 98:   MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
 99:   MatView(B,viewer);
100:   MatDestroy(B);
101:   return(0);
102: }

106: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107: {
109:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

112:   if (b->AIJ) {
113:     MatDestroy(b->AIJ);
114:   }
115:   if (b->OAIJ) {
116:     MatDestroy(b->OAIJ);
117:   }
118:   if (b->A) {
119:     MatDestroy(b->A);
120:   }
121:   if (b->ctx) {
122:     VecScatterDestroy(b->ctx);
123:   }
124:   if (b->w) {
125:     VecDestroy(b->w);
126:   }
127:   PetscFree(b);
128:   PetscObjectChangeTypeName((PetscObject)A,0);
129:   return(0);
130: }

132: /*MC
133:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 
134:   multicomponent problems, interpolating or restricting each component the same way independently.
135:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

137:   Operations provided:
138: . MatMult
139: . MatMultTranspose
140: . MatMultAdd
141: . MatMultTransposeAdd

143:   Level: advanced

145: .seealso: MatCreateSeqDense
146: M*/

151: PetscErrorCode  MatCreate_MAIJ(Mat A)
152: {
154:   Mat_MPIMAIJ    *b;
155:   PetscMPIInt    size;

158:   PetscNew(Mat_MPIMAIJ,&b);
159:   A->data  = (void*)b;
160:   PetscMemzero(A->ops,sizeof(struct _MatOps));
161:   A->factor           = 0;
162:   A->mapping          = 0;

164:   b->AIJ  = 0;
165:   b->dof  = 0;
166:   b->OAIJ = 0;
167:   b->ctx  = 0;
168:   b->w    = 0;
169:   MPI_Comm_size(A->comm,&size);
170:   if (size == 1){
171:     PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
172:   } else {
173:     PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
174:   }
175:   return(0);
176: }

179: /* --------------------------------------------------------------------------------------*/
182: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
183: {
184:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
185:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
186:   PetscScalar    *x,*y,*v,sum1, sum2;
188:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
189:   PetscInt       n,i,jrow,j;

192:   VecGetArray(xx,&x);
193:   VecGetArray(yy,&y);
194:   idx  = a->j;
195:   v    = a->a;
196:   ii   = a->i;

198:   for (i=0; i<m; i++) {
199:     jrow = ii[i];
200:     n    = ii[i+1] - jrow;
201:     sum1  = 0.0;
202:     sum2  = 0.0;
203:     for (j=0; j<n; j++) {
204:       sum1 += v[jrow]*x[2*idx[jrow]];
205:       sum2 += v[jrow]*x[2*idx[jrow]+1];
206:       jrow++;
207:      }
208:     y[2*i]   = sum1;
209:     y[2*i+1] = sum2;
210:   }

212:   PetscLogFlops(4*a->nz - 2*m);
213:   VecRestoreArray(xx,&x);
214:   VecRestoreArray(yy,&y);
215:   return(0);
216: }

220: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
221: {
222:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
223:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
224:   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
226:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

229:   VecSet(yy,zero);
230:   VecGetArray(xx,&x);
231:   VecGetArray(yy,&y);
232: 
233:   for (i=0; i<m; i++) {
234:     idx    = a->j + a->i[i] ;
235:     v      = a->a + a->i[i] ;
236:     n      = a->i[i+1] - a->i[i];
237:     alpha1 = x[2*i];
238:     alpha2 = x[2*i+1];
239:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
240:   }
241:   PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);
242:   VecRestoreArray(xx,&x);
243:   VecRestoreArray(yy,&y);
244:   return(0);
245: }

249: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
250: {
251:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
252:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
253:   PetscScalar    *x,*y,*v,sum1, sum2;
255:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
256:   PetscInt       n,i,jrow,j;

259:   if (yy != zz) {VecCopy(yy,zz);}
260:   VecGetArray(xx,&x);
261:   VecGetArray(zz,&y);
262:   idx  = a->j;
263:   v    = a->a;
264:   ii   = a->i;

266:   for (i=0; i<m; i++) {
267:     jrow = ii[i];
268:     n    = ii[i+1] - jrow;
269:     sum1  = 0.0;
270:     sum2  = 0.0;
271:     for (j=0; j<n; j++) {
272:       sum1 += v[jrow]*x[2*idx[jrow]];
273:       sum2 += v[jrow]*x[2*idx[jrow]+1];
274:       jrow++;
275:      }
276:     y[2*i]   += sum1;
277:     y[2*i+1] += sum2;
278:   }

280:   PetscLogFlops(4*a->nz - 2*m);
281:   VecRestoreArray(xx,&x);
282:   VecRestoreArray(zz,&y);
283:   return(0);
284: }
287: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
288: {
289:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
290:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
291:   PetscScalar    *x,*y,*v,alpha1,alpha2;
293:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

296:   if (yy != zz) {VecCopy(yy,zz);}
297:   VecGetArray(xx,&x);
298:   VecGetArray(zz,&y);
299: 
300:   for (i=0; i<m; i++) {
301:     idx   = a->j + a->i[i] ;
302:     v     = a->a + a->i[i] ;
303:     n     = a->i[i+1] - a->i[i];
304:     alpha1 = x[2*i];
305:     alpha2 = x[2*i+1];
306:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
307:   }
308:   PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);
309:   VecRestoreArray(xx,&x);
310:   VecRestoreArray(zz,&y);
311:   return(0);
312: }
313: /* --------------------------------------------------------------------------------------*/
316: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
317: {
318:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
319:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
320:   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
322:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
323:   PetscInt       n,i,jrow,j;

326:   VecGetArray(xx,&x);
327:   VecGetArray(yy,&y);
328:   idx  = a->j;
329:   v    = a->a;
330:   ii   = a->i;

332:   for (i=0; i<m; i++) {
333:     jrow = ii[i];
334:     n    = ii[i+1] - jrow;
335:     sum1  = 0.0;
336:     sum2  = 0.0;
337:     sum3  = 0.0;
338:     for (j=0; j<n; j++) {
339:       sum1 += v[jrow]*x[3*idx[jrow]];
340:       sum2 += v[jrow]*x[3*idx[jrow]+1];
341:       sum3 += v[jrow]*x[3*idx[jrow]+2];
342:       jrow++;
343:      }
344:     y[3*i]   = sum1;
345:     y[3*i+1] = sum2;
346:     y[3*i+2] = sum3;
347:   }

349:   PetscLogFlops(6*a->nz - 3*m);
350:   VecRestoreArray(xx,&x);
351:   VecRestoreArray(yy,&y);
352:   return(0);
353: }

357: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
358: {
359:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
360:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
361:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
363:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

366:   VecSet(yy,zero);
367:   VecGetArray(xx,&x);
368:   VecGetArray(yy,&y);
369: 
370:   for (i=0; i<m; i++) {
371:     idx    = a->j + a->i[i];
372:     v      = a->a + a->i[i];
373:     n      = a->i[i+1] - a->i[i];
374:     alpha1 = x[3*i];
375:     alpha2 = x[3*i+1];
376:     alpha3 = x[3*i+2];
377:     while (n-->0) {
378:       y[3*(*idx)]   += alpha1*(*v);
379:       y[3*(*idx)+1] += alpha2*(*v);
380:       y[3*(*idx)+2] += alpha3*(*v);
381:       idx++; v++;
382:     }
383:   }
384:   PetscLogFlops(6*a->nz - 3*b->AIJ->cmap.n);
385:   VecRestoreArray(xx,&x);
386:   VecRestoreArray(yy,&y);
387:   return(0);
388: }

392: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
393: {
394:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
395:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
396:   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
398:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
399:   PetscInt       n,i,jrow,j;

402:   if (yy != zz) {VecCopy(yy,zz);}
403:   VecGetArray(xx,&x);
404:   VecGetArray(zz,&y);
405:   idx  = a->j;
406:   v    = a->a;
407:   ii   = a->i;

409:   for (i=0; i<m; i++) {
410:     jrow = ii[i];
411:     n    = ii[i+1] - jrow;
412:     sum1  = 0.0;
413:     sum2  = 0.0;
414:     sum3  = 0.0;
415:     for (j=0; j<n; j++) {
416:       sum1 += v[jrow]*x[3*idx[jrow]];
417:       sum2 += v[jrow]*x[3*idx[jrow]+1];
418:       sum3 += v[jrow]*x[3*idx[jrow]+2];
419:       jrow++;
420:      }
421:     y[3*i]   += sum1;
422:     y[3*i+1] += sum2;
423:     y[3*i+2] += sum3;
424:   }

426:   PetscLogFlops(6*a->nz);
427:   VecRestoreArray(xx,&x);
428:   VecRestoreArray(zz,&y);
429:   return(0);
430: }
433: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
434: {
435:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
436:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
437:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
439:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

442:   if (yy != zz) {VecCopy(yy,zz);}
443:   VecGetArray(xx,&x);
444:   VecGetArray(zz,&y);
445:   for (i=0; i<m; i++) {
446:     idx    = a->j + a->i[i] ;
447:     v      = a->a + a->i[i] ;
448:     n      = a->i[i+1] - a->i[i];
449:     alpha1 = x[3*i];
450:     alpha2 = x[3*i+1];
451:     alpha3 = x[3*i+2];
452:     while (n-->0) {
453:       y[3*(*idx)]   += alpha1*(*v);
454:       y[3*(*idx)+1] += alpha2*(*v);
455:       y[3*(*idx)+2] += alpha3*(*v);
456:       idx++; v++;
457:     }
458:   }
459:   PetscLogFlops(6*a->nz);
460:   VecRestoreArray(xx,&x);
461:   VecRestoreArray(zz,&y);
462:   return(0);
463: }

465: /* ------------------------------------------------------------------------------*/
468: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
469: {
470:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
471:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
472:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
474:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
475:   PetscInt       n,i,jrow,j;

478:   VecGetArray(xx,&x);
479:   VecGetArray(yy,&y);
480:   idx  = a->j;
481:   v    = a->a;
482:   ii   = a->i;

484:   for (i=0; i<m; i++) {
485:     jrow = ii[i];
486:     n    = ii[i+1] - jrow;
487:     sum1  = 0.0;
488:     sum2  = 0.0;
489:     sum3  = 0.0;
490:     sum4  = 0.0;
491:     for (j=0; j<n; j++) {
492:       sum1 += v[jrow]*x[4*idx[jrow]];
493:       sum2 += v[jrow]*x[4*idx[jrow]+1];
494:       sum3 += v[jrow]*x[4*idx[jrow]+2];
495:       sum4 += v[jrow]*x[4*idx[jrow]+3];
496:       jrow++;
497:      }
498:     y[4*i]   = sum1;
499:     y[4*i+1] = sum2;
500:     y[4*i+2] = sum3;
501:     y[4*i+3] = sum4;
502:   }

504:   PetscLogFlops(8*a->nz - 4*m);
505:   VecRestoreArray(xx,&x);
506:   VecRestoreArray(yy,&y);
507:   return(0);
508: }

512: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
513: {
514:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
515:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
516:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
518:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

521:   VecSet(yy,zero);
522:   VecGetArray(xx,&x);
523:   VecGetArray(yy,&y);
524:   for (i=0; i<m; i++) {
525:     idx    = a->j + a->i[i] ;
526:     v      = a->a + a->i[i] ;
527:     n      = a->i[i+1] - a->i[i];
528:     alpha1 = x[4*i];
529:     alpha2 = x[4*i+1];
530:     alpha3 = x[4*i+2];
531:     alpha4 = x[4*i+3];
532:     while (n-->0) {
533:       y[4*(*idx)]   += alpha1*(*v);
534:       y[4*(*idx)+1] += alpha2*(*v);
535:       y[4*(*idx)+2] += alpha3*(*v);
536:       y[4*(*idx)+3] += alpha4*(*v);
537:       idx++; v++;
538:     }
539:   }
540:   PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);
541:   VecRestoreArray(xx,&x);
542:   VecRestoreArray(yy,&y);
543:   return(0);
544: }

548: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
549: {
550:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
551:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
552:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
554:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
555:   PetscInt       n,i,jrow,j;

558:   if (yy != zz) {VecCopy(yy,zz);}
559:   VecGetArray(xx,&x);
560:   VecGetArray(zz,&y);
561:   idx  = a->j;
562:   v    = a->a;
563:   ii   = a->i;

565:   for (i=0; i<m; i++) {
566:     jrow = ii[i];
567:     n    = ii[i+1] - jrow;
568:     sum1  = 0.0;
569:     sum2  = 0.0;
570:     sum3  = 0.0;
571:     sum4  = 0.0;
572:     for (j=0; j<n; j++) {
573:       sum1 += v[jrow]*x[4*idx[jrow]];
574:       sum2 += v[jrow]*x[4*idx[jrow]+1];
575:       sum3 += v[jrow]*x[4*idx[jrow]+2];
576:       sum4 += v[jrow]*x[4*idx[jrow]+3];
577:       jrow++;
578:      }
579:     y[4*i]   += sum1;
580:     y[4*i+1] += sum2;
581:     y[4*i+2] += sum3;
582:     y[4*i+3] += sum4;
583:   }

585:   PetscLogFlops(8*a->nz - 4*m);
586:   VecRestoreArray(xx,&x);
587:   VecRestoreArray(zz,&y);
588:   return(0);
589: }
592: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
593: {
594:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
595:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
596:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
598:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

601:   if (yy != zz) {VecCopy(yy,zz);}
602:   VecGetArray(xx,&x);
603:   VecGetArray(zz,&y);
604: 
605:   for (i=0; i<m; i++) {
606:     idx    = a->j + a->i[i] ;
607:     v      = a->a + a->i[i] ;
608:     n      = a->i[i+1] - a->i[i];
609:     alpha1 = x[4*i];
610:     alpha2 = x[4*i+1];
611:     alpha3 = x[4*i+2];
612:     alpha4 = x[4*i+3];
613:     while (n-->0) {
614:       y[4*(*idx)]   += alpha1*(*v);
615:       y[4*(*idx)+1] += alpha2*(*v);
616:       y[4*(*idx)+2] += alpha3*(*v);
617:       y[4*(*idx)+3] += alpha4*(*v);
618:       idx++; v++;
619:     }
620:   }
621:   PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);
622:   VecRestoreArray(xx,&x);
623:   VecRestoreArray(zz,&y);
624:   return(0);
625: }
626: /* ------------------------------------------------------------------------------*/

630: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
631: {
632:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
633:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
634:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
636:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
637:   PetscInt       n,i,jrow,j;

640:   VecGetArray(xx,&x);
641:   VecGetArray(yy,&y);
642:   idx  = a->j;
643:   v    = a->a;
644:   ii   = a->i;

646:   for (i=0; i<m; i++) {
647:     jrow = ii[i];
648:     n    = ii[i+1] - jrow;
649:     sum1  = 0.0;
650:     sum2  = 0.0;
651:     sum3  = 0.0;
652:     sum4  = 0.0;
653:     sum5  = 0.0;
654:     for (j=0; j<n; j++) {
655:       sum1 += v[jrow]*x[5*idx[jrow]];
656:       sum2 += v[jrow]*x[5*idx[jrow]+1];
657:       sum3 += v[jrow]*x[5*idx[jrow]+2];
658:       sum4 += v[jrow]*x[5*idx[jrow]+3];
659:       sum5 += v[jrow]*x[5*idx[jrow]+4];
660:       jrow++;
661:      }
662:     y[5*i]   = sum1;
663:     y[5*i+1] = sum2;
664:     y[5*i+2] = sum3;
665:     y[5*i+3] = sum4;
666:     y[5*i+4] = sum5;
667:   }

669:   PetscLogFlops(10*a->nz - 5*m);
670:   VecRestoreArray(xx,&x);
671:   VecRestoreArray(yy,&y);
672:   return(0);
673: }

677: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
678: {
679:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
680:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
681:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
683:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

686:   VecSet(yy,zero);
687:   VecGetArray(xx,&x);
688:   VecGetArray(yy,&y);
689: 
690:   for (i=0; i<m; i++) {
691:     idx    = a->j + a->i[i] ;
692:     v      = a->a + a->i[i] ;
693:     n      = a->i[i+1] - a->i[i];
694:     alpha1 = x[5*i];
695:     alpha2 = x[5*i+1];
696:     alpha3 = x[5*i+2];
697:     alpha4 = x[5*i+3];
698:     alpha5 = x[5*i+4];
699:     while (n-->0) {
700:       y[5*(*idx)]   += alpha1*(*v);
701:       y[5*(*idx)+1] += alpha2*(*v);
702:       y[5*(*idx)+2] += alpha3*(*v);
703:       y[5*(*idx)+3] += alpha4*(*v);
704:       y[5*(*idx)+4] += alpha5*(*v);
705:       idx++; v++;
706:     }
707:   }
708:   PetscLogFlops(10*a->nz - 5*b->AIJ->cmap.n);
709:   VecRestoreArray(xx,&x);
710:   VecRestoreArray(yy,&y);
711:   return(0);
712: }

716: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
717: {
718:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
719:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
720:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
722:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
723:   PetscInt       n,i,jrow,j;

726:   if (yy != zz) {VecCopy(yy,zz);}
727:   VecGetArray(xx,&x);
728:   VecGetArray(zz,&y);
729:   idx  = a->j;
730:   v    = a->a;
731:   ii   = a->i;

733:   for (i=0; i<m; i++) {
734:     jrow = ii[i];
735:     n    = ii[i+1] - jrow;
736:     sum1  = 0.0;
737:     sum2  = 0.0;
738:     sum3  = 0.0;
739:     sum4  = 0.0;
740:     sum5  = 0.0;
741:     for (j=0; j<n; j++) {
742:       sum1 += v[jrow]*x[5*idx[jrow]];
743:       sum2 += v[jrow]*x[5*idx[jrow]+1];
744:       sum3 += v[jrow]*x[5*idx[jrow]+2];
745:       sum4 += v[jrow]*x[5*idx[jrow]+3];
746:       sum5 += v[jrow]*x[5*idx[jrow]+4];
747:       jrow++;
748:      }
749:     y[5*i]   += sum1;
750:     y[5*i+1] += sum2;
751:     y[5*i+2] += sum3;
752:     y[5*i+3] += sum4;
753:     y[5*i+4] += sum5;
754:   }

756:   PetscLogFlops(10*a->nz);
757:   VecRestoreArray(xx,&x);
758:   VecRestoreArray(zz,&y);
759:   return(0);
760: }

764: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
765: {
766:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
767:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
768:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
770:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

773:   if (yy != zz) {VecCopy(yy,zz);}
774:   VecGetArray(xx,&x);
775:   VecGetArray(zz,&y);
776: 
777:   for (i=0; i<m; i++) {
778:     idx    = a->j + a->i[i] ;
779:     v      = a->a + a->i[i] ;
780:     n      = a->i[i+1] - a->i[i];
781:     alpha1 = x[5*i];
782:     alpha2 = x[5*i+1];
783:     alpha3 = x[5*i+2];
784:     alpha4 = x[5*i+3];
785:     alpha5 = x[5*i+4];
786:     while (n-->0) {
787:       y[5*(*idx)]   += alpha1*(*v);
788:       y[5*(*idx)+1] += alpha2*(*v);
789:       y[5*(*idx)+2] += alpha3*(*v);
790:       y[5*(*idx)+3] += alpha4*(*v);
791:       y[5*(*idx)+4] += alpha5*(*v);
792:       idx++; v++;
793:     }
794:   }
795:   PetscLogFlops(10*a->nz);
796:   VecRestoreArray(xx,&x);
797:   VecRestoreArray(zz,&y);
798:   return(0);
799: }

801: /* ------------------------------------------------------------------------------*/
804: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
805: {
806:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
807:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
808:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
810:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
811:   PetscInt       n,i,jrow,j;

814:   VecGetArray(xx,&x);
815:   VecGetArray(yy,&y);
816:   idx  = a->j;
817:   v    = a->a;
818:   ii   = a->i;

820:   for (i=0; i<m; i++) {
821:     jrow = ii[i];
822:     n    = ii[i+1] - jrow;
823:     sum1  = 0.0;
824:     sum2  = 0.0;
825:     sum3  = 0.0;
826:     sum4  = 0.0;
827:     sum5  = 0.0;
828:     sum6  = 0.0;
829:     for (j=0; j<n; j++) {
830:       sum1 += v[jrow]*x[6*idx[jrow]];
831:       sum2 += v[jrow]*x[6*idx[jrow]+1];
832:       sum3 += v[jrow]*x[6*idx[jrow]+2];
833:       sum4 += v[jrow]*x[6*idx[jrow]+3];
834:       sum5 += v[jrow]*x[6*idx[jrow]+4];
835:       sum6 += v[jrow]*x[6*idx[jrow]+5];
836:       jrow++;
837:      }
838:     y[6*i]   = sum1;
839:     y[6*i+1] = sum2;
840:     y[6*i+2] = sum3;
841:     y[6*i+3] = sum4;
842:     y[6*i+4] = sum5;
843:     y[6*i+5] = sum6;
844:   }

846:   PetscLogFlops(12*a->nz - 6*m);
847:   VecRestoreArray(xx,&x);
848:   VecRestoreArray(yy,&y);
849:   return(0);
850: }

854: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
855: {
856:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
857:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
858:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
860:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

863:   VecSet(yy,zero);
864:   VecGetArray(xx,&x);
865:   VecGetArray(yy,&y);

867:   for (i=0; i<m; i++) {
868:     idx    = a->j + a->i[i] ;
869:     v      = a->a + a->i[i] ;
870:     n      = a->i[i+1] - a->i[i];
871:     alpha1 = x[6*i];
872:     alpha2 = x[6*i+1];
873:     alpha3 = x[6*i+2];
874:     alpha4 = x[6*i+3];
875:     alpha5 = x[6*i+4];
876:     alpha6 = x[6*i+5];
877:     while (n-->0) {
878:       y[6*(*idx)]   += alpha1*(*v);
879:       y[6*(*idx)+1] += alpha2*(*v);
880:       y[6*(*idx)+2] += alpha3*(*v);
881:       y[6*(*idx)+3] += alpha4*(*v);
882:       y[6*(*idx)+4] += alpha5*(*v);
883:       y[6*(*idx)+5] += alpha6*(*v);
884:       idx++; v++;
885:     }
886:   }
887:   PetscLogFlops(12*a->nz - 6*b->AIJ->cmap.n);
888:   VecRestoreArray(xx,&x);
889:   VecRestoreArray(yy,&y);
890:   return(0);
891: }

895: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
896: {
897:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
898:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
899:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
901:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
902:   PetscInt       n,i,jrow,j;

905:   if (yy != zz) {VecCopy(yy,zz);}
906:   VecGetArray(xx,&x);
907:   VecGetArray(zz,&y);
908:   idx  = a->j;
909:   v    = a->a;
910:   ii   = a->i;

912:   for (i=0; i<m; i++) {
913:     jrow = ii[i];
914:     n    = ii[i+1] - jrow;
915:     sum1  = 0.0;
916:     sum2  = 0.0;
917:     sum3  = 0.0;
918:     sum4  = 0.0;
919:     sum5  = 0.0;
920:     sum6  = 0.0;
921:     for (j=0; j<n; j++) {
922:       sum1 += v[jrow]*x[6*idx[jrow]];
923:       sum2 += v[jrow]*x[6*idx[jrow]+1];
924:       sum3 += v[jrow]*x[6*idx[jrow]+2];
925:       sum4 += v[jrow]*x[6*idx[jrow]+3];
926:       sum5 += v[jrow]*x[6*idx[jrow]+4];
927:       sum6 += v[jrow]*x[6*idx[jrow]+5];
928:       jrow++;
929:      }
930:     y[6*i]   += sum1;
931:     y[6*i+1] += sum2;
932:     y[6*i+2] += sum3;
933:     y[6*i+3] += sum4;
934:     y[6*i+4] += sum5;
935:     y[6*i+5] += sum6;
936:   }

938:   PetscLogFlops(12*a->nz);
939:   VecRestoreArray(xx,&x);
940:   VecRestoreArray(zz,&y);
941:   return(0);
942: }

946: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
947: {
948:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
949:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
950:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
952:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

955:   if (yy != zz) {VecCopy(yy,zz);}
956:   VecGetArray(xx,&x);
957:   VecGetArray(zz,&y);
958: 
959:   for (i=0; i<m; i++) {
960:     idx    = a->j + a->i[i] ;
961:     v      = a->a + a->i[i] ;
962:     n      = a->i[i+1] - a->i[i];
963:     alpha1 = x[6*i];
964:     alpha2 = x[6*i+1];
965:     alpha3 = x[6*i+2];
966:     alpha4 = x[6*i+3];
967:     alpha5 = x[6*i+4];
968:     alpha6 = x[6*i+5];
969:     while (n-->0) {
970:       y[6*(*idx)]   += alpha1*(*v);
971:       y[6*(*idx)+1] += alpha2*(*v);
972:       y[6*(*idx)+2] += alpha3*(*v);
973:       y[6*(*idx)+3] += alpha4*(*v);
974:       y[6*(*idx)+4] += alpha5*(*v);
975:       y[6*(*idx)+5] += alpha6*(*v);
976:       idx++; v++;
977:     }
978:   }
979:   PetscLogFlops(12*a->nz);
980:   VecRestoreArray(xx,&x);
981:   VecRestoreArray(zz,&y);
982:   return(0);
983: }

985: /* ------------------------------------------------------------------------------*/
988: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
989: {
990:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
991:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
992:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
994:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
995:   PetscInt       n,i,jrow,j;

998:   VecGetArray(xx,&x);
999:   VecGetArray(yy,&y);
1000:   idx  = a->j;
1001:   v    = a->a;
1002:   ii   = a->i;

1004:   for (i=0; i<m; i++) {
1005:     jrow = ii[i];
1006:     n    = ii[i+1] - jrow;
1007:     sum1  = 0.0;
1008:     sum2  = 0.0;
1009:     sum3  = 0.0;
1010:     sum4  = 0.0;
1011:     sum5  = 0.0;
1012:     sum6  = 0.0;
1013:     sum7  = 0.0;
1014:     for (j=0; j<n; j++) {
1015:       sum1 += v[jrow]*x[7*idx[jrow]];
1016:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1017:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1018:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1019:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1020:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1021:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1022:       jrow++;
1023:      }
1024:     y[7*i]   = sum1;
1025:     y[7*i+1] = sum2;
1026:     y[7*i+2] = sum3;
1027:     y[7*i+3] = sum4;
1028:     y[7*i+4] = sum5;
1029:     y[7*i+5] = sum6;
1030:     y[7*i+6] = sum7;
1031:   }

1033:   PetscLogFlops(14*a->nz - 7*m);
1034:   VecRestoreArray(xx,&x);
1035:   VecRestoreArray(yy,&y);
1036:   return(0);
1037: }

1041: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1042: {
1043:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1044:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1045:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1047:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1050:   VecSet(yy,zero);
1051:   VecGetArray(xx,&x);
1052:   VecGetArray(yy,&y);

1054:   for (i=0; i<m; i++) {
1055:     idx    = a->j + a->i[i] ;
1056:     v      = a->a + a->i[i] ;
1057:     n      = a->i[i+1] - a->i[i];
1058:     alpha1 = x[7*i];
1059:     alpha2 = x[7*i+1];
1060:     alpha3 = x[7*i+2];
1061:     alpha4 = x[7*i+3];
1062:     alpha5 = x[7*i+4];
1063:     alpha6 = x[7*i+5];
1064:     alpha7 = x[7*i+6];
1065:     while (n-->0) {
1066:       y[7*(*idx)]   += alpha1*(*v);
1067:       y[7*(*idx)+1] += alpha2*(*v);
1068:       y[7*(*idx)+2] += alpha3*(*v);
1069:       y[7*(*idx)+3] += alpha4*(*v);
1070:       y[7*(*idx)+4] += alpha5*(*v);
1071:       y[7*(*idx)+5] += alpha6*(*v);
1072:       y[7*(*idx)+6] += alpha7*(*v);
1073:       idx++; v++;
1074:     }
1075:   }
1076:   PetscLogFlops(14*a->nz - 7*b->AIJ->cmap.n);
1077:   VecRestoreArray(xx,&x);
1078:   VecRestoreArray(yy,&y);
1079:   return(0);
1080: }

1084: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1085: {
1086:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1087:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1088:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1090:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1091:   PetscInt       n,i,jrow,j;

1094:   if (yy != zz) {VecCopy(yy,zz);}
1095:   VecGetArray(xx,&x);
1096:   VecGetArray(zz,&y);
1097:   idx  = a->j;
1098:   v    = a->a;
1099:   ii   = a->i;

1101:   for (i=0; i<m; i++) {
1102:     jrow = ii[i];
1103:     n    = ii[i+1] - jrow;
1104:     sum1  = 0.0;
1105:     sum2  = 0.0;
1106:     sum3  = 0.0;
1107:     sum4  = 0.0;
1108:     sum5  = 0.0;
1109:     sum6  = 0.0;
1110:     sum7  = 0.0;
1111:     for (j=0; j<n; j++) {
1112:       sum1 += v[jrow]*x[7*idx[jrow]];
1113:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1114:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1115:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1116:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1117:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1118:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1119:       jrow++;
1120:      }
1121:     y[7*i]   += sum1;
1122:     y[7*i+1] += sum2;
1123:     y[7*i+2] += sum3;
1124:     y[7*i+3] += sum4;
1125:     y[7*i+4] += sum5;
1126:     y[7*i+5] += sum6;
1127:     y[7*i+6] += sum7;
1128:   }

1130:   PetscLogFlops(14*a->nz);
1131:   VecRestoreArray(xx,&x);
1132:   VecRestoreArray(zz,&y);
1133:   return(0);
1134: }

1138: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1139: {
1140:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1141:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1142:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1144:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1147:   if (yy != zz) {VecCopy(yy,zz);}
1148:   VecGetArray(xx,&x);
1149:   VecGetArray(zz,&y);
1150:   for (i=0; i<m; i++) {
1151:     idx    = a->j + a->i[i] ;
1152:     v      = a->a + a->i[i] ;
1153:     n      = a->i[i+1] - a->i[i];
1154:     alpha1 = x[7*i];
1155:     alpha2 = x[7*i+1];
1156:     alpha3 = x[7*i+2];
1157:     alpha4 = x[7*i+3];
1158:     alpha5 = x[7*i+4];
1159:     alpha6 = x[7*i+5];
1160:     alpha7 = x[7*i+6];
1161:     while (n-->0) {
1162:       y[7*(*idx)]   += alpha1*(*v);
1163:       y[7*(*idx)+1] += alpha2*(*v);
1164:       y[7*(*idx)+2] += alpha3*(*v);
1165:       y[7*(*idx)+3] += alpha4*(*v);
1166:       y[7*(*idx)+4] += alpha5*(*v);
1167:       y[7*(*idx)+5] += alpha6*(*v);
1168:       y[7*(*idx)+6] += alpha7*(*v);
1169:       idx++; v++;
1170:     }
1171:   }
1172:   PetscLogFlops(14*a->nz);
1173:   VecRestoreArray(xx,&x);
1174:   VecRestoreArray(zz,&y);
1175:   return(0);
1176: }

1180: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1181: {
1182:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1183:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1184:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1186:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1187:   PetscInt       n,i,jrow,j;

1190:   VecGetArray(xx,&x);
1191:   VecGetArray(yy,&y);
1192:   idx  = a->j;
1193:   v    = a->a;
1194:   ii   = a->i;

1196:   for (i=0; i<m; i++) {
1197:     jrow = ii[i];
1198:     n    = ii[i+1] - jrow;
1199:     sum1  = 0.0;
1200:     sum2  = 0.0;
1201:     sum3  = 0.0;
1202:     sum4  = 0.0;
1203:     sum5  = 0.0;
1204:     sum6  = 0.0;
1205:     sum7  = 0.0;
1206:     sum8  = 0.0;
1207:     for (j=0; j<n; j++) {
1208:       sum1 += v[jrow]*x[8*idx[jrow]];
1209:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1210:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1211:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1212:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1213:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1214:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1215:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1216:       jrow++;
1217:      }
1218:     y[8*i]   = sum1;
1219:     y[8*i+1] = sum2;
1220:     y[8*i+2] = sum3;
1221:     y[8*i+3] = sum4;
1222:     y[8*i+4] = sum5;
1223:     y[8*i+5] = sum6;
1224:     y[8*i+6] = sum7;
1225:     y[8*i+7] = sum8;
1226:   }

1228:   PetscLogFlops(16*a->nz - 8*m);
1229:   VecRestoreArray(xx,&x);
1230:   VecRestoreArray(yy,&y);
1231:   return(0);
1232: }

1236: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1237: {
1238:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1239:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1240:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1242:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1245:   VecSet(yy,zero);
1246:   VecGetArray(xx,&x);
1247:   VecGetArray(yy,&y);

1249:   for (i=0; i<m; i++) {
1250:     idx    = a->j + a->i[i] ;
1251:     v      = a->a + a->i[i] ;
1252:     n      = a->i[i+1] - a->i[i];
1253:     alpha1 = x[8*i];
1254:     alpha2 = x[8*i+1];
1255:     alpha3 = x[8*i+2];
1256:     alpha4 = x[8*i+3];
1257:     alpha5 = x[8*i+4];
1258:     alpha6 = x[8*i+5];
1259:     alpha7 = x[8*i+6];
1260:     alpha8 = x[8*i+7];
1261:     while (n-->0) {
1262:       y[8*(*idx)]   += alpha1*(*v);
1263:       y[8*(*idx)+1] += alpha2*(*v);
1264:       y[8*(*idx)+2] += alpha3*(*v);
1265:       y[8*(*idx)+3] += alpha4*(*v);
1266:       y[8*(*idx)+4] += alpha5*(*v);
1267:       y[8*(*idx)+5] += alpha6*(*v);
1268:       y[8*(*idx)+6] += alpha7*(*v);
1269:       y[8*(*idx)+7] += alpha8*(*v);
1270:       idx++; v++;
1271:     }
1272:   }
1273:   PetscLogFlops(16*a->nz - 8*b->AIJ->cmap.n);
1274:   VecRestoreArray(xx,&x);
1275:   VecRestoreArray(yy,&y);
1276:   return(0);
1277: }

1281: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1282: {
1283:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1284:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1285:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1287:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1288:   PetscInt       n,i,jrow,j;

1291:   if (yy != zz) {VecCopy(yy,zz);}
1292:   VecGetArray(xx,&x);
1293:   VecGetArray(zz,&y);
1294:   idx  = a->j;
1295:   v    = a->a;
1296:   ii   = a->i;

1298:   for (i=0; i<m; i++) {
1299:     jrow = ii[i];
1300:     n    = ii[i+1] - jrow;
1301:     sum1  = 0.0;
1302:     sum2  = 0.0;
1303:     sum3  = 0.0;
1304:     sum4  = 0.0;
1305:     sum5  = 0.0;
1306:     sum6  = 0.0;
1307:     sum7  = 0.0;
1308:     sum8  = 0.0;
1309:     for (j=0; j<n; j++) {
1310:       sum1 += v[jrow]*x[8*idx[jrow]];
1311:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1312:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1313:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1314:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1315:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1316:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1317:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1318:       jrow++;
1319:      }
1320:     y[8*i]   += sum1;
1321:     y[8*i+1] += sum2;
1322:     y[8*i+2] += sum3;
1323:     y[8*i+3] += sum4;
1324:     y[8*i+4] += sum5;
1325:     y[8*i+5] += sum6;
1326:     y[8*i+6] += sum7;
1327:     y[8*i+7] += sum8;
1328:   }

1330:   PetscLogFlops(16*a->nz);
1331:   VecRestoreArray(xx,&x);
1332:   VecRestoreArray(zz,&y);
1333:   return(0);
1334: }

1338: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1339: {
1340:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1341:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1342:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1344:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1347:   if (yy != zz) {VecCopy(yy,zz);}
1348:   VecGetArray(xx,&x);
1349:   VecGetArray(zz,&y);
1350:   for (i=0; i<m; i++) {
1351:     idx    = a->j + a->i[i] ;
1352:     v      = a->a + a->i[i] ;
1353:     n      = a->i[i+1] - a->i[i];
1354:     alpha1 = x[8*i];
1355:     alpha2 = x[8*i+1];
1356:     alpha3 = x[8*i+2];
1357:     alpha4 = x[8*i+3];
1358:     alpha5 = x[8*i+4];
1359:     alpha6 = x[8*i+5];
1360:     alpha7 = x[8*i+6];
1361:     alpha8 = x[8*i+7];
1362:     while (n-->0) {
1363:       y[8*(*idx)]   += alpha1*(*v);
1364:       y[8*(*idx)+1] += alpha2*(*v);
1365:       y[8*(*idx)+2] += alpha3*(*v);
1366:       y[8*(*idx)+3] += alpha4*(*v);
1367:       y[8*(*idx)+4] += alpha5*(*v);
1368:       y[8*(*idx)+5] += alpha6*(*v);
1369:       y[8*(*idx)+6] += alpha7*(*v);
1370:       y[8*(*idx)+7] += alpha8*(*v);
1371:       idx++; v++;
1372:     }
1373:   }
1374:   PetscLogFlops(16*a->nz);
1375:   VecRestoreArray(xx,&x);
1376:   VecRestoreArray(zz,&y);
1377:   return(0);
1378: }

1380: /* ------------------------------------------------------------------------------*/
1383: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1384: {
1385:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1386:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1387:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1389:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1390:   PetscInt       n,i,jrow,j;

1393:   VecGetArray(xx,&x);
1394:   VecGetArray(yy,&y);
1395:   idx  = a->j;
1396:   v    = a->a;
1397:   ii   = a->i;

1399:   for (i=0; i<m; i++) {
1400:     jrow = ii[i];
1401:     n    = ii[i+1] - jrow;
1402:     sum1  = 0.0;
1403:     sum2  = 0.0;
1404:     sum3  = 0.0;
1405:     sum4  = 0.0;
1406:     sum5  = 0.0;
1407:     sum6  = 0.0;
1408:     sum7  = 0.0;
1409:     sum8  = 0.0;
1410:     sum9  = 0.0;
1411:     for (j=0; j<n; j++) {
1412:       sum1 += v[jrow]*x[9*idx[jrow]];
1413:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1414:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1415:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1416:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1417:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1418:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1419:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1420:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1421:       jrow++;
1422:      }
1423:     y[9*i]   = sum1;
1424:     y[9*i+1] = sum2;
1425:     y[9*i+2] = sum3;
1426:     y[9*i+3] = sum4;
1427:     y[9*i+4] = sum5;
1428:     y[9*i+5] = sum6;
1429:     y[9*i+6] = sum7;
1430:     y[9*i+7] = sum8;
1431:     y[9*i+8] = sum9;
1432:   }

1434:   PetscLogFlops(18*a->nz - 9*m);
1435:   VecRestoreArray(xx,&x);
1436:   VecRestoreArray(yy,&y);
1437:   return(0);
1438: }

1440: /* ------------------------------------------------------------------------------*/

1444: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1445: {
1446:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1447:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1448:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1450:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1453:   VecSet(yy,zero);
1454:   VecGetArray(xx,&x);
1455:   VecGetArray(yy,&y);

1457:   for (i=0; i<m; i++) {
1458:     idx    = a->j + a->i[i] ;
1459:     v      = a->a + a->i[i] ;
1460:     n      = a->i[i+1] - a->i[i];
1461:     alpha1 = x[9*i];
1462:     alpha2 = x[9*i+1];
1463:     alpha3 = x[9*i+2];
1464:     alpha4 = x[9*i+3];
1465:     alpha5 = x[9*i+4];
1466:     alpha6 = x[9*i+5];
1467:     alpha7 = x[9*i+6];
1468:     alpha8 = x[9*i+7];
1469:     alpha9 = x[9*i+8];
1470:     while (n-->0) {
1471:       y[9*(*idx)]   += alpha1*(*v);
1472:       y[9*(*idx)+1] += alpha2*(*v);
1473:       y[9*(*idx)+2] += alpha3*(*v);
1474:       y[9*(*idx)+3] += alpha4*(*v);
1475:       y[9*(*idx)+4] += alpha5*(*v);
1476:       y[9*(*idx)+5] += alpha6*(*v);
1477:       y[9*(*idx)+6] += alpha7*(*v);
1478:       y[9*(*idx)+7] += alpha8*(*v);
1479:       y[9*(*idx)+8] += alpha9*(*v);
1480:       idx++; v++;
1481:     }
1482:   }
1483:   PetscLogFlops(18*a->nz - 9*b->AIJ->cmap.n);
1484:   VecRestoreArray(xx,&x);
1485:   VecRestoreArray(yy,&y);
1486:   return(0);
1487: }

1491: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1492: {
1493:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1494:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1495:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1497:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1498:   PetscInt       n,i,jrow,j;

1501:   if (yy != zz) {VecCopy(yy,zz);}
1502:   VecGetArray(xx,&x);
1503:   VecGetArray(zz,&y);
1504:   idx  = a->j;
1505:   v    = a->a;
1506:   ii   = a->i;

1508:   for (i=0; i<m; i++) {
1509:     jrow = ii[i];
1510:     n    = ii[i+1] - jrow;
1511:     sum1  = 0.0;
1512:     sum2  = 0.0;
1513:     sum3  = 0.0;
1514:     sum4  = 0.0;
1515:     sum5  = 0.0;
1516:     sum6  = 0.0;
1517:     sum7  = 0.0;
1518:     sum8  = 0.0;
1519:     sum9  = 0.0;
1520:     for (j=0; j<n; j++) {
1521:       sum1 += v[jrow]*x[9*idx[jrow]];
1522:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1523:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1524:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1525:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1526:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1527:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1528:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1529:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1530:       jrow++;
1531:      }
1532:     y[9*i]   += sum1;
1533:     y[9*i+1] += sum2;
1534:     y[9*i+2] += sum3;
1535:     y[9*i+3] += sum4;
1536:     y[9*i+4] += sum5;
1537:     y[9*i+5] += sum6;
1538:     y[9*i+6] += sum7;
1539:     y[9*i+7] += sum8;
1540:     y[9*i+8] += sum9;
1541:   }

1543:   PetscLogFlops(18*a->nz);
1544:   VecRestoreArray(xx,&x);
1545:   VecRestoreArray(zz,&y);
1546:   return(0);
1547: }

1551: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1552: {
1553:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1554:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1555:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1557:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1560:   if (yy != zz) {VecCopy(yy,zz);}
1561:   VecGetArray(xx,&x);
1562:   VecGetArray(zz,&y);
1563:   for (i=0; i<m; i++) {
1564:     idx    = a->j + a->i[i] ;
1565:     v      = a->a + a->i[i] ;
1566:     n      = a->i[i+1] - a->i[i];
1567:     alpha1 = x[9*i];
1568:     alpha2 = x[9*i+1];
1569:     alpha3 = x[9*i+2];
1570:     alpha4 = x[9*i+3];
1571:     alpha5 = x[9*i+4];
1572:     alpha6 = x[9*i+5];
1573:     alpha7 = x[9*i+6];
1574:     alpha8 = x[9*i+7];
1575:     alpha9 = x[9*i+8];
1576:     while (n-->0) {
1577:       y[9*(*idx)]   += alpha1*(*v);
1578:       y[9*(*idx)+1] += alpha2*(*v);
1579:       y[9*(*idx)+2] += alpha3*(*v);
1580:       y[9*(*idx)+3] += alpha4*(*v);
1581:       y[9*(*idx)+4] += alpha5*(*v);
1582:       y[9*(*idx)+5] += alpha6*(*v);
1583:       y[9*(*idx)+6] += alpha7*(*v);
1584:       y[9*(*idx)+7] += alpha8*(*v);
1585:       y[9*(*idx)+8] += alpha9*(*v);
1586:       idx++; v++;
1587:     }
1588:   }
1589:   PetscLogFlops(18*a->nz);
1590:   VecRestoreArray(xx,&x);
1591:   VecRestoreArray(zz,&y);
1592:   return(0);
1593: }
1594: /*--------------------------------------------------------------------------------------------*/
1597: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1598: {
1599:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1600:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1601:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1603:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1604:   PetscInt       n,i,jrow,j;

1607:   VecGetArray(xx,&x);
1608:   VecGetArray(yy,&y);
1609:   idx  = a->j;
1610:   v    = a->a;
1611:   ii   = a->i;

1613:   for (i=0; i<m; i++) {
1614:     jrow = ii[i];
1615:     n    = ii[i+1] - jrow;
1616:     sum1  = 0.0;
1617:     sum2  = 0.0;
1618:     sum3  = 0.0;
1619:     sum4  = 0.0;
1620:     sum5  = 0.0;
1621:     sum6  = 0.0;
1622:     sum7  = 0.0;
1623:     sum8  = 0.0;
1624:     sum9  = 0.0;
1625:     sum10 = 0.0;
1626:     for (j=0; j<n; j++) {
1627:       sum1  += v[jrow]*x[10*idx[jrow]];
1628:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1629:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1630:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1631:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1632:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1633:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1634:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1635:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1636:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1637:       jrow++;
1638:      }
1639:     y[10*i]   = sum1;
1640:     y[10*i+1] = sum2;
1641:     y[10*i+2] = sum3;
1642:     y[10*i+3] = sum4;
1643:     y[10*i+4] = sum5;
1644:     y[10*i+5] = sum6;
1645:     y[10*i+6] = sum7;
1646:     y[10*i+7] = sum8;
1647:     y[10*i+8] = sum9;
1648:     y[10*i+9] = sum10;
1649:   }

1651:   PetscLogFlops(20*a->nz - 10*m);
1652:   VecRestoreArray(xx,&x);
1653:   VecRestoreArray(yy,&y);
1654:   return(0);
1655: }

1659: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1660: {
1661:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1662:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1663:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1665:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1666:   PetscInt       n,i,jrow,j;

1669:   if (yy != zz) {VecCopy(yy,zz);}
1670:   VecGetArray(xx,&x);
1671:   VecGetArray(zz,&y);
1672:   idx  = a->j;
1673:   v    = a->a;
1674:   ii   = a->i;

1676:   for (i=0; i<m; i++) {
1677:     jrow = ii[i];
1678:     n    = ii[i+1] - jrow;
1679:     sum1  = 0.0;
1680:     sum2  = 0.0;
1681:     sum3  = 0.0;
1682:     sum4  = 0.0;
1683:     sum5  = 0.0;
1684:     sum6  = 0.0;
1685:     sum7  = 0.0;
1686:     sum8  = 0.0;
1687:     sum9  = 0.0;
1688:     sum10 = 0.0;
1689:     for (j=0; j<n; j++) {
1690:       sum1  += v[jrow]*x[10*idx[jrow]];
1691:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1692:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1693:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1694:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1695:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1696:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1697:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1698:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1699:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1700:       jrow++;
1701:      }
1702:     y[10*i]   += sum1;
1703:     y[10*i+1] += sum2;
1704:     y[10*i+2] += sum3;
1705:     y[10*i+3] += sum4;
1706:     y[10*i+4] += sum5;
1707:     y[10*i+5] += sum6;
1708:     y[10*i+6] += sum7;
1709:     y[10*i+7] += sum8;
1710:     y[10*i+8] += sum9;
1711:     y[10*i+9] += sum10;
1712:   }

1714:   PetscLogFlops(20*a->nz - 10*m);
1715:   VecRestoreArray(xx,&x);
1716:   VecRestoreArray(yy,&y);
1717:   return(0);
1718: }

1722: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1723: {
1724:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1725:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1726:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1728:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1731:   VecSet(yy,zero);
1732:   VecGetArray(xx,&x);
1733:   VecGetArray(yy,&y);

1735:   for (i=0; i<m; i++) {
1736:     idx    = a->j + a->i[i] ;
1737:     v      = a->a + a->i[i] ;
1738:     n      = a->i[i+1] - a->i[i];
1739:     alpha1 = x[10*i];
1740:     alpha2 = x[10*i+1];
1741:     alpha3 = x[10*i+2];
1742:     alpha4 = x[10*i+3];
1743:     alpha5 = x[10*i+4];
1744:     alpha6 = x[10*i+5];
1745:     alpha7 = x[10*i+6];
1746:     alpha8 = x[10*i+7];
1747:     alpha9 = x[10*i+8];
1748:     alpha10 = x[10*i+9];
1749:     while (n-->0) {
1750:       y[10*(*idx)]   += alpha1*(*v);
1751:       y[10*(*idx)+1] += alpha2*(*v);
1752:       y[10*(*idx)+2] += alpha3*(*v);
1753:       y[10*(*idx)+3] += alpha4*(*v);
1754:       y[10*(*idx)+4] += alpha5*(*v);
1755:       y[10*(*idx)+5] += alpha6*(*v);
1756:       y[10*(*idx)+6] += alpha7*(*v);
1757:       y[10*(*idx)+7] += alpha8*(*v);
1758:       y[10*(*idx)+8] += alpha9*(*v);
1759:       y[10*(*idx)+9] += alpha10*(*v);
1760:       idx++; v++;
1761:     }
1762:   }
1763:   PetscLogFlops(20*a->nz - 10*b->AIJ->cmap.n);
1764:   VecRestoreArray(xx,&x);
1765:   VecRestoreArray(yy,&y);
1766:   return(0);
1767: }

1771: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1772: {
1773:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1774:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1775:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
17