Actual source code: maij.c
1: #define PETSCMAT_DLL
3: /*
4: Defines the basic matrix operations for the MAIJ matrix storage format.
5: This format is used for restriction and interpolation operations for
6: multicomponent problems. It interpolates each component the same way
7: independently.
9: We provide:
10: MatMult()
11: MatMultTranspose()
12: MatMultTransposeAdd()
13: MatMultAdd()
14: and
15: MatCreateMAIJ(Mat,dof,Mat*)
17: This single directory handles both the sequential and parallel codes
18: */
20: #include src/mat/impls/maij/maij.h
21: #include src/mat/utils/freespace.h
22: #include private/vecimpl.h
26: PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
27: {
29: PetscTruth ismpimaij,isseqmaij;
32: PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
33: PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
34: if (ismpimaij) {
35: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
37: *B = b->A;
38: } else if (isseqmaij) {
39: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
41: *B = b->AIJ;
42: } else {
43: *B = A;
44: }
45: return(0);
46: }
50: PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
51: {
53: Mat Aij;
56: MatMAIJGetAIJ(A,&Aij);
57: MatCreateMAIJ(Aij,dof,B);
58: return(0);
59: }
63: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
64: {
66: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
69: if (b->AIJ) {
70: MatDestroy(b->AIJ);
71: }
72: PetscFree(b);
73: return(0);
74: }
78: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
79: {
81: Mat B;
84: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
85: MatView(B,viewer);
86: MatDestroy(B);
87: return(0);
88: }
92: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
93: {
95: Mat B;
98: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
99: MatView(B,viewer);
100: MatDestroy(B);
101: return(0);
102: }
106: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107: {
109: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
112: if (b->AIJ) {
113: MatDestroy(b->AIJ);
114: }
115: if (b->OAIJ) {
116: MatDestroy(b->OAIJ);
117: }
118: if (b->A) {
119: MatDestroy(b->A);
120: }
121: if (b->ctx) {
122: VecScatterDestroy(b->ctx);
123: }
124: if (b->w) {
125: VecDestroy(b->w);
126: }
127: PetscFree(b);
128: PetscObjectChangeTypeName((PetscObject)A,0);
129: return(0);
130: }
132: /*MC
133: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
134: multicomponent problems, interpolating or restricting each component the same way independently.
135: The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
137: Operations provided:
138: . MatMult
139: . MatMultTranspose
140: . MatMultAdd
141: . MatMultTransposeAdd
143: Level: advanced
145: .seealso: MatCreateSeqDense
146: M*/
151: PetscErrorCode MatCreate_MAIJ(Mat A)
152: {
154: Mat_MPIMAIJ *b;
155: PetscMPIInt size;
158: PetscNew(Mat_MPIMAIJ,&b);
159: A->data = (void*)b;
160: PetscMemzero(A->ops,sizeof(struct _MatOps));
161: A->factor = 0;
162: A->mapping = 0;
164: b->AIJ = 0;
165: b->dof = 0;
166: b->OAIJ = 0;
167: b->ctx = 0;
168: b->w = 0;
169: MPI_Comm_size(A->comm,&size);
170: if (size == 1){
171: PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
172: } else {
173: PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
174: }
175: return(0);
176: }
179: /* --------------------------------------------------------------------------------------*/
182: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
183: {
184: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
185: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
186: PetscScalar *x,*y,*v,sum1, sum2;
188: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
189: PetscInt n,i,jrow,j;
192: VecGetArray(xx,&x);
193: VecGetArray(yy,&y);
194: idx = a->j;
195: v = a->a;
196: ii = a->i;
198: for (i=0; i<m; i++) {
199: jrow = ii[i];
200: n = ii[i+1] - jrow;
201: sum1 = 0.0;
202: sum2 = 0.0;
203: for (j=0; j<n; j++) {
204: sum1 += v[jrow]*x[2*idx[jrow]];
205: sum2 += v[jrow]*x[2*idx[jrow]+1];
206: jrow++;
207: }
208: y[2*i] = sum1;
209: y[2*i+1] = sum2;
210: }
212: PetscLogFlops(4*a->nz - 2*m);
213: VecRestoreArray(xx,&x);
214: VecRestoreArray(yy,&y);
215: return(0);
216: }
220: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
221: {
222: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
223: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
224: PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0;
226: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
229: VecSet(yy,zero);
230: VecGetArray(xx,&x);
231: VecGetArray(yy,&y);
232:
233: for (i=0; i<m; i++) {
234: idx = a->j + a->i[i] ;
235: v = a->a + a->i[i] ;
236: n = a->i[i+1] - a->i[i];
237: alpha1 = x[2*i];
238: alpha2 = x[2*i+1];
239: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
240: }
241: PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);
242: VecRestoreArray(xx,&x);
243: VecRestoreArray(yy,&y);
244: return(0);
245: }
249: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
250: {
251: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
252: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
253: PetscScalar *x,*y,*v,sum1, sum2;
255: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
256: PetscInt n,i,jrow,j;
259: if (yy != zz) {VecCopy(yy,zz);}
260: VecGetArray(xx,&x);
261: VecGetArray(zz,&y);
262: idx = a->j;
263: v = a->a;
264: ii = a->i;
266: for (i=0; i<m; i++) {
267: jrow = ii[i];
268: n = ii[i+1] - jrow;
269: sum1 = 0.0;
270: sum2 = 0.0;
271: for (j=0; j<n; j++) {
272: sum1 += v[jrow]*x[2*idx[jrow]];
273: sum2 += v[jrow]*x[2*idx[jrow]+1];
274: jrow++;
275: }
276: y[2*i] += sum1;
277: y[2*i+1] += sum2;
278: }
280: PetscLogFlops(4*a->nz - 2*m);
281: VecRestoreArray(xx,&x);
282: VecRestoreArray(zz,&y);
283: return(0);
284: }
287: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
288: {
289: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
290: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
291: PetscScalar *x,*y,*v,alpha1,alpha2;
293: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
296: if (yy != zz) {VecCopy(yy,zz);}
297: VecGetArray(xx,&x);
298: VecGetArray(zz,&y);
299:
300: for (i=0; i<m; i++) {
301: idx = a->j + a->i[i] ;
302: v = a->a + a->i[i] ;
303: n = a->i[i+1] - a->i[i];
304: alpha1 = x[2*i];
305: alpha2 = x[2*i+1];
306: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
307: }
308: PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);
309: VecRestoreArray(xx,&x);
310: VecRestoreArray(zz,&y);
311: return(0);
312: }
313: /* --------------------------------------------------------------------------------------*/
316: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
317: {
318: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
319: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
320: PetscScalar *x,*y,*v,sum1, sum2, sum3;
322: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
323: PetscInt n,i,jrow,j;
326: VecGetArray(xx,&x);
327: VecGetArray(yy,&y);
328: idx = a->j;
329: v = a->a;
330: ii = a->i;
332: for (i=0; i<m; i++) {
333: jrow = ii[i];
334: n = ii[i+1] - jrow;
335: sum1 = 0.0;
336: sum2 = 0.0;
337: sum3 = 0.0;
338: for (j=0; j<n; j++) {
339: sum1 += v[jrow]*x[3*idx[jrow]];
340: sum2 += v[jrow]*x[3*idx[jrow]+1];
341: sum3 += v[jrow]*x[3*idx[jrow]+2];
342: jrow++;
343: }
344: y[3*i] = sum1;
345: y[3*i+1] = sum2;
346: y[3*i+2] = sum3;
347: }
349: PetscLogFlops(6*a->nz - 3*m);
350: VecRestoreArray(xx,&x);
351: VecRestoreArray(yy,&y);
352: return(0);
353: }
357: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
358: {
359: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
360: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
361: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
363: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
366: VecSet(yy,zero);
367: VecGetArray(xx,&x);
368: VecGetArray(yy,&y);
369:
370: for (i=0; i<m; i++) {
371: idx = a->j + a->i[i];
372: v = a->a + a->i[i];
373: n = a->i[i+1] - a->i[i];
374: alpha1 = x[3*i];
375: alpha2 = x[3*i+1];
376: alpha3 = x[3*i+2];
377: while (n-->0) {
378: y[3*(*idx)] += alpha1*(*v);
379: y[3*(*idx)+1] += alpha2*(*v);
380: y[3*(*idx)+2] += alpha3*(*v);
381: idx++; v++;
382: }
383: }
384: PetscLogFlops(6*a->nz - 3*b->AIJ->cmap.n);
385: VecRestoreArray(xx,&x);
386: VecRestoreArray(yy,&y);
387: return(0);
388: }
392: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
393: {
394: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
395: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
396: PetscScalar *x,*y,*v,sum1, sum2, sum3;
398: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
399: PetscInt n,i,jrow,j;
402: if (yy != zz) {VecCopy(yy,zz);}
403: VecGetArray(xx,&x);
404: VecGetArray(zz,&y);
405: idx = a->j;
406: v = a->a;
407: ii = a->i;
409: for (i=0; i<m; i++) {
410: jrow = ii[i];
411: n = ii[i+1] - jrow;
412: sum1 = 0.0;
413: sum2 = 0.0;
414: sum3 = 0.0;
415: for (j=0; j<n; j++) {
416: sum1 += v[jrow]*x[3*idx[jrow]];
417: sum2 += v[jrow]*x[3*idx[jrow]+1];
418: sum3 += v[jrow]*x[3*idx[jrow]+2];
419: jrow++;
420: }
421: y[3*i] += sum1;
422: y[3*i+1] += sum2;
423: y[3*i+2] += sum3;
424: }
426: PetscLogFlops(6*a->nz);
427: VecRestoreArray(xx,&x);
428: VecRestoreArray(zz,&y);
429: return(0);
430: }
433: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
434: {
435: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
436: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
437: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3;
439: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
442: if (yy != zz) {VecCopy(yy,zz);}
443: VecGetArray(xx,&x);
444: VecGetArray(zz,&y);
445: for (i=0; i<m; i++) {
446: idx = a->j + a->i[i] ;
447: v = a->a + a->i[i] ;
448: n = a->i[i+1] - a->i[i];
449: alpha1 = x[3*i];
450: alpha2 = x[3*i+1];
451: alpha3 = x[3*i+2];
452: while (n-->0) {
453: y[3*(*idx)] += alpha1*(*v);
454: y[3*(*idx)+1] += alpha2*(*v);
455: y[3*(*idx)+2] += alpha3*(*v);
456: idx++; v++;
457: }
458: }
459: PetscLogFlops(6*a->nz);
460: VecRestoreArray(xx,&x);
461: VecRestoreArray(zz,&y);
462: return(0);
463: }
465: /* ------------------------------------------------------------------------------*/
468: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
469: {
470: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
471: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
472: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4;
474: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
475: PetscInt n,i,jrow,j;
478: VecGetArray(xx,&x);
479: VecGetArray(yy,&y);
480: idx = a->j;
481: v = a->a;
482: ii = a->i;
484: for (i=0; i<m; i++) {
485: jrow = ii[i];
486: n = ii[i+1] - jrow;
487: sum1 = 0.0;
488: sum2 = 0.0;
489: sum3 = 0.0;
490: sum4 = 0.0;
491: for (j=0; j<n; j++) {
492: sum1 += v[jrow]*x[4*idx[jrow]];
493: sum2 += v[jrow]*x[4*idx[jrow]+1];
494: sum3 += v[jrow]*x[4*idx[jrow]+2];
495: sum4 += v[jrow]*x[4*idx[jrow]+3];
496: jrow++;
497: }
498: y[4*i] = sum1;
499: y[4*i+1] = sum2;
500: y[4*i+2] = sum3;
501: y[4*i+3] = sum4;
502: }
504: PetscLogFlops(8*a->nz - 4*m);
505: VecRestoreArray(xx,&x);
506: VecRestoreArray(yy,&y);
507: return(0);
508: }
512: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
513: {
514: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
515: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
516: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
518: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
521: VecSet(yy,zero);
522: VecGetArray(xx,&x);
523: VecGetArray(yy,&y);
524: for (i=0; i<m; i++) {
525: idx = a->j + a->i[i] ;
526: v = a->a + a->i[i] ;
527: n = a->i[i+1] - a->i[i];
528: alpha1 = x[4*i];
529: alpha2 = x[4*i+1];
530: alpha3 = x[4*i+2];
531: alpha4 = x[4*i+3];
532: while (n-->0) {
533: y[4*(*idx)] += alpha1*(*v);
534: y[4*(*idx)+1] += alpha2*(*v);
535: y[4*(*idx)+2] += alpha3*(*v);
536: y[4*(*idx)+3] += alpha4*(*v);
537: idx++; v++;
538: }
539: }
540: PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);
541: VecRestoreArray(xx,&x);
542: VecRestoreArray(yy,&y);
543: return(0);
544: }
548: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
549: {
550: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
551: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
552: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4;
554: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
555: PetscInt n,i,jrow,j;
558: if (yy != zz) {VecCopy(yy,zz);}
559: VecGetArray(xx,&x);
560: VecGetArray(zz,&y);
561: idx = a->j;
562: v = a->a;
563: ii = a->i;
565: for (i=0; i<m; i++) {
566: jrow = ii[i];
567: n = ii[i+1] - jrow;
568: sum1 = 0.0;
569: sum2 = 0.0;
570: sum3 = 0.0;
571: sum4 = 0.0;
572: for (j=0; j<n; j++) {
573: sum1 += v[jrow]*x[4*idx[jrow]];
574: sum2 += v[jrow]*x[4*idx[jrow]+1];
575: sum3 += v[jrow]*x[4*idx[jrow]+2];
576: sum4 += v[jrow]*x[4*idx[jrow]+3];
577: jrow++;
578: }
579: y[4*i] += sum1;
580: y[4*i+1] += sum2;
581: y[4*i+2] += sum3;
582: y[4*i+3] += sum4;
583: }
585: PetscLogFlops(8*a->nz - 4*m);
586: VecRestoreArray(xx,&x);
587: VecRestoreArray(zz,&y);
588: return(0);
589: }
592: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
593: {
594: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
595: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
596: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
598: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
601: if (yy != zz) {VecCopy(yy,zz);}
602: VecGetArray(xx,&x);
603: VecGetArray(zz,&y);
604:
605: for (i=0; i<m; i++) {
606: idx = a->j + a->i[i] ;
607: v = a->a + a->i[i] ;
608: n = a->i[i+1] - a->i[i];
609: alpha1 = x[4*i];
610: alpha2 = x[4*i+1];
611: alpha3 = x[4*i+2];
612: alpha4 = x[4*i+3];
613: while (n-->0) {
614: y[4*(*idx)] += alpha1*(*v);
615: y[4*(*idx)+1] += alpha2*(*v);
616: y[4*(*idx)+2] += alpha3*(*v);
617: y[4*(*idx)+3] += alpha4*(*v);
618: idx++; v++;
619: }
620: }
621: PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);
622: VecRestoreArray(xx,&x);
623: VecRestoreArray(zz,&y);
624: return(0);
625: }
626: /* ------------------------------------------------------------------------------*/
630: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
631: {
632: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
633: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
634: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
636: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
637: PetscInt n,i,jrow,j;
640: VecGetArray(xx,&x);
641: VecGetArray(yy,&y);
642: idx = a->j;
643: v = a->a;
644: ii = a->i;
646: for (i=0; i<m; i++) {
647: jrow = ii[i];
648: n = ii[i+1] - jrow;
649: sum1 = 0.0;
650: sum2 = 0.0;
651: sum3 = 0.0;
652: sum4 = 0.0;
653: sum5 = 0.0;
654: for (j=0; j<n; j++) {
655: sum1 += v[jrow]*x[5*idx[jrow]];
656: sum2 += v[jrow]*x[5*idx[jrow]+1];
657: sum3 += v[jrow]*x[5*idx[jrow]+2];
658: sum4 += v[jrow]*x[5*idx[jrow]+3];
659: sum5 += v[jrow]*x[5*idx[jrow]+4];
660: jrow++;
661: }
662: y[5*i] = sum1;
663: y[5*i+1] = sum2;
664: y[5*i+2] = sum3;
665: y[5*i+3] = sum4;
666: y[5*i+4] = sum5;
667: }
669: PetscLogFlops(10*a->nz - 5*m);
670: VecRestoreArray(xx,&x);
671: VecRestoreArray(yy,&y);
672: return(0);
673: }
677: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
678: {
679: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
680: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
681: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
683: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
686: VecSet(yy,zero);
687: VecGetArray(xx,&x);
688: VecGetArray(yy,&y);
689:
690: for (i=0; i<m; i++) {
691: idx = a->j + a->i[i] ;
692: v = a->a + a->i[i] ;
693: n = a->i[i+1] - a->i[i];
694: alpha1 = x[5*i];
695: alpha2 = x[5*i+1];
696: alpha3 = x[5*i+2];
697: alpha4 = x[5*i+3];
698: alpha5 = x[5*i+4];
699: while (n-->0) {
700: y[5*(*idx)] += alpha1*(*v);
701: y[5*(*idx)+1] += alpha2*(*v);
702: y[5*(*idx)+2] += alpha3*(*v);
703: y[5*(*idx)+3] += alpha4*(*v);
704: y[5*(*idx)+4] += alpha5*(*v);
705: idx++; v++;
706: }
707: }
708: PetscLogFlops(10*a->nz - 5*b->AIJ->cmap.n);
709: VecRestoreArray(xx,&x);
710: VecRestoreArray(yy,&y);
711: return(0);
712: }
716: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
717: {
718: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
719: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
720: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
722: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
723: PetscInt n,i,jrow,j;
726: if (yy != zz) {VecCopy(yy,zz);}
727: VecGetArray(xx,&x);
728: VecGetArray(zz,&y);
729: idx = a->j;
730: v = a->a;
731: ii = a->i;
733: for (i=0; i<m; i++) {
734: jrow = ii[i];
735: n = ii[i+1] - jrow;
736: sum1 = 0.0;
737: sum2 = 0.0;
738: sum3 = 0.0;
739: sum4 = 0.0;
740: sum5 = 0.0;
741: for (j=0; j<n; j++) {
742: sum1 += v[jrow]*x[5*idx[jrow]];
743: sum2 += v[jrow]*x[5*idx[jrow]+1];
744: sum3 += v[jrow]*x[5*idx[jrow]+2];
745: sum4 += v[jrow]*x[5*idx[jrow]+3];
746: sum5 += v[jrow]*x[5*idx[jrow]+4];
747: jrow++;
748: }
749: y[5*i] += sum1;
750: y[5*i+1] += sum2;
751: y[5*i+2] += sum3;
752: y[5*i+3] += sum4;
753: y[5*i+4] += sum5;
754: }
756: PetscLogFlops(10*a->nz);
757: VecRestoreArray(xx,&x);
758: VecRestoreArray(zz,&y);
759: return(0);
760: }
764: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
765: {
766: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
767: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
768: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
770: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
773: if (yy != zz) {VecCopy(yy,zz);}
774: VecGetArray(xx,&x);
775: VecGetArray(zz,&y);
776:
777: for (i=0; i<m; i++) {
778: idx = a->j + a->i[i] ;
779: v = a->a + a->i[i] ;
780: n = a->i[i+1] - a->i[i];
781: alpha1 = x[5*i];
782: alpha2 = x[5*i+1];
783: alpha3 = x[5*i+2];
784: alpha4 = x[5*i+3];
785: alpha5 = x[5*i+4];
786: while (n-->0) {
787: y[5*(*idx)] += alpha1*(*v);
788: y[5*(*idx)+1] += alpha2*(*v);
789: y[5*(*idx)+2] += alpha3*(*v);
790: y[5*(*idx)+3] += alpha4*(*v);
791: y[5*(*idx)+4] += alpha5*(*v);
792: idx++; v++;
793: }
794: }
795: PetscLogFlops(10*a->nz);
796: VecRestoreArray(xx,&x);
797: VecRestoreArray(zz,&y);
798: return(0);
799: }
801: /* ------------------------------------------------------------------------------*/
804: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
805: {
806: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
807: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
808: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
810: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
811: PetscInt n,i,jrow,j;
814: VecGetArray(xx,&x);
815: VecGetArray(yy,&y);
816: idx = a->j;
817: v = a->a;
818: ii = a->i;
820: for (i=0; i<m; i++) {
821: jrow = ii[i];
822: n = ii[i+1] - jrow;
823: sum1 = 0.0;
824: sum2 = 0.0;
825: sum3 = 0.0;
826: sum4 = 0.0;
827: sum5 = 0.0;
828: sum6 = 0.0;
829: for (j=0; j<n; j++) {
830: sum1 += v[jrow]*x[6*idx[jrow]];
831: sum2 += v[jrow]*x[6*idx[jrow]+1];
832: sum3 += v[jrow]*x[6*idx[jrow]+2];
833: sum4 += v[jrow]*x[6*idx[jrow]+3];
834: sum5 += v[jrow]*x[6*idx[jrow]+4];
835: sum6 += v[jrow]*x[6*idx[jrow]+5];
836: jrow++;
837: }
838: y[6*i] = sum1;
839: y[6*i+1] = sum2;
840: y[6*i+2] = sum3;
841: y[6*i+3] = sum4;
842: y[6*i+4] = sum5;
843: y[6*i+5] = sum6;
844: }
846: PetscLogFlops(12*a->nz - 6*m);
847: VecRestoreArray(xx,&x);
848: VecRestoreArray(yy,&y);
849: return(0);
850: }
854: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
855: {
856: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
857: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
858: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
860: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
863: VecSet(yy,zero);
864: VecGetArray(xx,&x);
865: VecGetArray(yy,&y);
867: for (i=0; i<m; i++) {
868: idx = a->j + a->i[i] ;
869: v = a->a + a->i[i] ;
870: n = a->i[i+1] - a->i[i];
871: alpha1 = x[6*i];
872: alpha2 = x[6*i+1];
873: alpha3 = x[6*i+2];
874: alpha4 = x[6*i+3];
875: alpha5 = x[6*i+4];
876: alpha6 = x[6*i+5];
877: while (n-->0) {
878: y[6*(*idx)] += alpha1*(*v);
879: y[6*(*idx)+1] += alpha2*(*v);
880: y[6*(*idx)+2] += alpha3*(*v);
881: y[6*(*idx)+3] += alpha4*(*v);
882: y[6*(*idx)+4] += alpha5*(*v);
883: y[6*(*idx)+5] += alpha6*(*v);
884: idx++; v++;
885: }
886: }
887: PetscLogFlops(12*a->nz - 6*b->AIJ->cmap.n);
888: VecRestoreArray(xx,&x);
889: VecRestoreArray(yy,&y);
890: return(0);
891: }
895: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
896: {
897: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
898: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
899: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
901: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
902: PetscInt n,i,jrow,j;
905: if (yy != zz) {VecCopy(yy,zz);}
906: VecGetArray(xx,&x);
907: VecGetArray(zz,&y);
908: idx = a->j;
909: v = a->a;
910: ii = a->i;
912: for (i=0; i<m; i++) {
913: jrow = ii[i];
914: n = ii[i+1] - jrow;
915: sum1 = 0.0;
916: sum2 = 0.0;
917: sum3 = 0.0;
918: sum4 = 0.0;
919: sum5 = 0.0;
920: sum6 = 0.0;
921: for (j=0; j<n; j++) {
922: sum1 += v[jrow]*x[6*idx[jrow]];
923: sum2 += v[jrow]*x[6*idx[jrow]+1];
924: sum3 += v[jrow]*x[6*idx[jrow]+2];
925: sum4 += v[jrow]*x[6*idx[jrow]+3];
926: sum5 += v[jrow]*x[6*idx[jrow]+4];
927: sum6 += v[jrow]*x[6*idx[jrow]+5];
928: jrow++;
929: }
930: y[6*i] += sum1;
931: y[6*i+1] += sum2;
932: y[6*i+2] += sum3;
933: y[6*i+3] += sum4;
934: y[6*i+4] += sum5;
935: y[6*i+5] += sum6;
936: }
938: PetscLogFlops(12*a->nz);
939: VecRestoreArray(xx,&x);
940: VecRestoreArray(zz,&y);
941: return(0);
942: }
946: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
947: {
948: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
949: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
950: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
952: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
955: if (yy != zz) {VecCopy(yy,zz);}
956: VecGetArray(xx,&x);
957: VecGetArray(zz,&y);
958:
959: for (i=0; i<m; i++) {
960: idx = a->j + a->i[i] ;
961: v = a->a + a->i[i] ;
962: n = a->i[i+1] - a->i[i];
963: alpha1 = x[6*i];
964: alpha2 = x[6*i+1];
965: alpha3 = x[6*i+2];
966: alpha4 = x[6*i+3];
967: alpha5 = x[6*i+4];
968: alpha6 = x[6*i+5];
969: while (n-->0) {
970: y[6*(*idx)] += alpha1*(*v);
971: y[6*(*idx)+1] += alpha2*(*v);
972: y[6*(*idx)+2] += alpha3*(*v);
973: y[6*(*idx)+3] += alpha4*(*v);
974: y[6*(*idx)+4] += alpha5*(*v);
975: y[6*(*idx)+5] += alpha6*(*v);
976: idx++; v++;
977: }
978: }
979: PetscLogFlops(12*a->nz);
980: VecRestoreArray(xx,&x);
981: VecRestoreArray(zz,&y);
982: return(0);
983: }
985: /* ------------------------------------------------------------------------------*/
988: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
989: {
990: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
991: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
992: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
994: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
995: PetscInt n,i,jrow,j;
998: VecGetArray(xx,&x);
999: VecGetArray(yy,&y);
1000: idx = a->j;
1001: v = a->a;
1002: ii = a->i;
1004: for (i=0; i<m; i++) {
1005: jrow = ii[i];
1006: n = ii[i+1] - jrow;
1007: sum1 = 0.0;
1008: sum2 = 0.0;
1009: sum3 = 0.0;
1010: sum4 = 0.0;
1011: sum5 = 0.0;
1012: sum6 = 0.0;
1013: sum7 = 0.0;
1014: for (j=0; j<n; j++) {
1015: sum1 += v[jrow]*x[7*idx[jrow]];
1016: sum2 += v[jrow]*x[7*idx[jrow]+1];
1017: sum3 += v[jrow]*x[7*idx[jrow]+2];
1018: sum4 += v[jrow]*x[7*idx[jrow]+3];
1019: sum5 += v[jrow]*x[7*idx[jrow]+4];
1020: sum6 += v[jrow]*x[7*idx[jrow]+5];
1021: sum7 += v[jrow]*x[7*idx[jrow]+6];
1022: jrow++;
1023: }
1024: y[7*i] = sum1;
1025: y[7*i+1] = sum2;
1026: y[7*i+2] = sum3;
1027: y[7*i+3] = sum4;
1028: y[7*i+4] = sum5;
1029: y[7*i+5] = sum6;
1030: y[7*i+6] = sum7;
1031: }
1033: PetscLogFlops(14*a->nz - 7*m);
1034: VecRestoreArray(xx,&x);
1035: VecRestoreArray(yy,&y);
1036: return(0);
1037: }
1041: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1042: {
1043: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1044: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1045: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1047: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1050: VecSet(yy,zero);
1051: VecGetArray(xx,&x);
1052: VecGetArray(yy,&y);
1054: for (i=0; i<m; i++) {
1055: idx = a->j + a->i[i] ;
1056: v = a->a + a->i[i] ;
1057: n = a->i[i+1] - a->i[i];
1058: alpha1 = x[7*i];
1059: alpha2 = x[7*i+1];
1060: alpha3 = x[7*i+2];
1061: alpha4 = x[7*i+3];
1062: alpha5 = x[7*i+4];
1063: alpha6 = x[7*i+5];
1064: alpha7 = x[7*i+6];
1065: while (n-->0) {
1066: y[7*(*idx)] += alpha1*(*v);
1067: y[7*(*idx)+1] += alpha2*(*v);
1068: y[7*(*idx)+2] += alpha3*(*v);
1069: y[7*(*idx)+3] += alpha4*(*v);
1070: y[7*(*idx)+4] += alpha5*(*v);
1071: y[7*(*idx)+5] += alpha6*(*v);
1072: y[7*(*idx)+6] += alpha7*(*v);
1073: idx++; v++;
1074: }
1075: }
1076: PetscLogFlops(14*a->nz - 7*b->AIJ->cmap.n);
1077: VecRestoreArray(xx,&x);
1078: VecRestoreArray(yy,&y);
1079: return(0);
1080: }
1084: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1085: {
1086: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1087: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1088: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1090: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1091: PetscInt n,i,jrow,j;
1094: if (yy != zz) {VecCopy(yy,zz);}
1095: VecGetArray(xx,&x);
1096: VecGetArray(zz,&y);
1097: idx = a->j;
1098: v = a->a;
1099: ii = a->i;
1101: for (i=0; i<m; i++) {
1102: jrow = ii[i];
1103: n = ii[i+1] - jrow;
1104: sum1 = 0.0;
1105: sum2 = 0.0;
1106: sum3 = 0.0;
1107: sum4 = 0.0;
1108: sum5 = 0.0;
1109: sum6 = 0.0;
1110: sum7 = 0.0;
1111: for (j=0; j<n; j++) {
1112: sum1 += v[jrow]*x[7*idx[jrow]];
1113: sum2 += v[jrow]*x[7*idx[jrow]+1];
1114: sum3 += v[jrow]*x[7*idx[jrow]+2];
1115: sum4 += v[jrow]*x[7*idx[jrow]+3];
1116: sum5 += v[jrow]*x[7*idx[jrow]+4];
1117: sum6 += v[jrow]*x[7*idx[jrow]+5];
1118: sum7 += v[jrow]*x[7*idx[jrow]+6];
1119: jrow++;
1120: }
1121: y[7*i] += sum1;
1122: y[7*i+1] += sum2;
1123: y[7*i+2] += sum3;
1124: y[7*i+3] += sum4;
1125: y[7*i+4] += sum5;
1126: y[7*i+5] += sum6;
1127: y[7*i+6] += sum7;
1128: }
1130: PetscLogFlops(14*a->nz);
1131: VecRestoreArray(xx,&x);
1132: VecRestoreArray(zz,&y);
1133: return(0);
1134: }
1138: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1139: {
1140: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1141: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1142: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1144: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1147: if (yy != zz) {VecCopy(yy,zz);}
1148: VecGetArray(xx,&x);
1149: VecGetArray(zz,&y);
1150: for (i=0; i<m; i++) {
1151: idx = a->j + a->i[i] ;
1152: v = a->a + a->i[i] ;
1153: n = a->i[i+1] - a->i[i];
1154: alpha1 = x[7*i];
1155: alpha2 = x[7*i+1];
1156: alpha3 = x[7*i+2];
1157: alpha4 = x[7*i+3];
1158: alpha5 = x[7*i+4];
1159: alpha6 = x[7*i+5];
1160: alpha7 = x[7*i+6];
1161: while (n-->0) {
1162: y[7*(*idx)] += alpha1*(*v);
1163: y[7*(*idx)+1] += alpha2*(*v);
1164: y[7*(*idx)+2] += alpha3*(*v);
1165: y[7*(*idx)+3] += alpha4*(*v);
1166: y[7*(*idx)+4] += alpha5*(*v);
1167: y[7*(*idx)+5] += alpha6*(*v);
1168: y[7*(*idx)+6] += alpha7*(*v);
1169: idx++; v++;
1170: }
1171: }
1172: PetscLogFlops(14*a->nz);
1173: VecRestoreArray(xx,&x);
1174: VecRestoreArray(zz,&y);
1175: return(0);
1176: }
1180: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1181: {
1182: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1183: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1184: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1186: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1187: PetscInt n,i,jrow,j;
1190: VecGetArray(xx,&x);
1191: VecGetArray(yy,&y);
1192: idx = a->j;
1193: v = a->a;
1194: ii = a->i;
1196: for (i=0; i<m; i++) {
1197: jrow = ii[i];
1198: n = ii[i+1] - jrow;
1199: sum1 = 0.0;
1200: sum2 = 0.0;
1201: sum3 = 0.0;
1202: sum4 = 0.0;
1203: sum5 = 0.0;
1204: sum6 = 0.0;
1205: sum7 = 0.0;
1206: sum8 = 0.0;
1207: for (j=0; j<n; j++) {
1208: sum1 += v[jrow]*x[8*idx[jrow]];
1209: sum2 += v[jrow]*x[8*idx[jrow]+1];
1210: sum3 += v[jrow]*x[8*idx[jrow]+2];
1211: sum4 += v[jrow]*x[8*idx[jrow]+3];
1212: sum5 += v[jrow]*x[8*idx[jrow]+4];
1213: sum6 += v[jrow]*x[8*idx[jrow]+5];
1214: sum7 += v[jrow]*x[8*idx[jrow]+6];
1215: sum8 += v[jrow]*x[8*idx[jrow]+7];
1216: jrow++;
1217: }
1218: y[8*i] = sum1;
1219: y[8*i+1] = sum2;
1220: y[8*i+2] = sum3;
1221: y[8*i+3] = sum4;
1222: y[8*i+4] = sum5;
1223: y[8*i+5] = sum6;
1224: y[8*i+6] = sum7;
1225: y[8*i+7] = sum8;
1226: }
1228: PetscLogFlops(16*a->nz - 8*m);
1229: VecRestoreArray(xx,&x);
1230: VecRestoreArray(yy,&y);
1231: return(0);
1232: }
1236: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1237: {
1238: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1239: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1240: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1242: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1245: VecSet(yy,zero);
1246: VecGetArray(xx,&x);
1247: VecGetArray(yy,&y);
1249: for (i=0; i<m; i++) {
1250: idx = a->j + a->i[i] ;
1251: v = a->a + a->i[i] ;
1252: n = a->i[i+1] - a->i[i];
1253: alpha1 = x[8*i];
1254: alpha2 = x[8*i+1];
1255: alpha3 = x[8*i+2];
1256: alpha4 = x[8*i+3];
1257: alpha5 = x[8*i+4];
1258: alpha6 = x[8*i+5];
1259: alpha7 = x[8*i+6];
1260: alpha8 = x[8*i+7];
1261: while (n-->0) {
1262: y[8*(*idx)] += alpha1*(*v);
1263: y[8*(*idx)+1] += alpha2*(*v);
1264: y[8*(*idx)+2] += alpha3*(*v);
1265: y[8*(*idx)+3] += alpha4*(*v);
1266: y[8*(*idx)+4] += alpha5*(*v);
1267: y[8*(*idx)+5] += alpha6*(*v);
1268: y[8*(*idx)+6] += alpha7*(*v);
1269: y[8*(*idx)+7] += alpha8*(*v);
1270: idx++; v++;
1271: }
1272: }
1273: PetscLogFlops(16*a->nz - 8*b->AIJ->cmap.n);
1274: VecRestoreArray(xx,&x);
1275: VecRestoreArray(yy,&y);
1276: return(0);
1277: }
1281: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1282: {
1283: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1284: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1285: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1287: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1288: PetscInt n,i,jrow,j;
1291: if (yy != zz) {VecCopy(yy,zz);}
1292: VecGetArray(xx,&x);
1293: VecGetArray(zz,&y);
1294: idx = a->j;
1295: v = a->a;
1296: ii = a->i;
1298: for (i=0; i<m; i++) {
1299: jrow = ii[i];
1300: n = ii[i+1] - jrow;
1301: sum1 = 0.0;
1302: sum2 = 0.0;
1303: sum3 = 0.0;
1304: sum4 = 0.0;
1305: sum5 = 0.0;
1306: sum6 = 0.0;
1307: sum7 = 0.0;
1308: sum8 = 0.0;
1309: for (j=0; j<n; j++) {
1310: sum1 += v[jrow]*x[8*idx[jrow]];
1311: sum2 += v[jrow]*x[8*idx[jrow]+1];
1312: sum3 += v[jrow]*x[8*idx[jrow]+2];
1313: sum4 += v[jrow]*x[8*idx[jrow]+3];
1314: sum5 += v[jrow]*x[8*idx[jrow]+4];
1315: sum6 += v[jrow]*x[8*idx[jrow]+5];
1316: sum7 += v[jrow]*x[8*idx[jrow]+6];
1317: sum8 += v[jrow]*x[8*idx[jrow]+7];
1318: jrow++;
1319: }
1320: y[8*i] += sum1;
1321: y[8*i+1] += sum2;
1322: y[8*i+2] += sum3;
1323: y[8*i+3] += sum4;
1324: y[8*i+4] += sum5;
1325: y[8*i+5] += sum6;
1326: y[8*i+6] += sum7;
1327: y[8*i+7] += sum8;
1328: }
1330: PetscLogFlops(16*a->nz);
1331: VecRestoreArray(xx,&x);
1332: VecRestoreArray(zz,&y);
1333: return(0);
1334: }
1338: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1339: {
1340: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1341: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1342: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1344: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1347: if (yy != zz) {VecCopy(yy,zz);}
1348: VecGetArray(xx,&x);
1349: VecGetArray(zz,&y);
1350: for (i=0; i<m; i++) {
1351: idx = a->j + a->i[i] ;
1352: v = a->a + a->i[i] ;
1353: n = a->i[i+1] - a->i[i];
1354: alpha1 = x[8*i];
1355: alpha2 = x[8*i+1];
1356: alpha3 = x[8*i+2];
1357: alpha4 = x[8*i+3];
1358: alpha5 = x[8*i+4];
1359: alpha6 = x[8*i+5];
1360: alpha7 = x[8*i+6];
1361: alpha8 = x[8*i+7];
1362: while (n-->0) {
1363: y[8*(*idx)] += alpha1*(*v);
1364: y[8*(*idx)+1] += alpha2*(*v);
1365: y[8*(*idx)+2] += alpha3*(*v);
1366: y[8*(*idx)+3] += alpha4*(*v);
1367: y[8*(*idx)+4] += alpha5*(*v);
1368: y[8*(*idx)+5] += alpha6*(*v);
1369: y[8*(*idx)+6] += alpha7*(*v);
1370: y[8*(*idx)+7] += alpha8*(*v);
1371: idx++; v++;
1372: }
1373: }
1374: PetscLogFlops(16*a->nz);
1375: VecRestoreArray(xx,&x);
1376: VecRestoreArray(zz,&y);
1377: return(0);
1378: }
1380: /* ------------------------------------------------------------------------------*/
1383: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1384: {
1385: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1386: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1387: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1389: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1390: PetscInt n,i,jrow,j;
1393: VecGetArray(xx,&x);
1394: VecGetArray(yy,&y);
1395: idx = a->j;
1396: v = a->a;
1397: ii = a->i;
1399: for (i=0; i<m; i++) {
1400: jrow = ii[i];
1401: n = ii[i+1] - jrow;
1402: sum1 = 0.0;
1403: sum2 = 0.0;
1404: sum3 = 0.0;
1405: sum4 = 0.0;
1406: sum5 = 0.0;
1407: sum6 = 0.0;
1408: sum7 = 0.0;
1409: sum8 = 0.0;
1410: sum9 = 0.0;
1411: for (j=0; j<n; j++) {
1412: sum1 += v[jrow]*x[9*idx[jrow]];
1413: sum2 += v[jrow]*x[9*idx[jrow]+1];
1414: sum3 += v[jrow]*x[9*idx[jrow]+2];
1415: sum4 += v[jrow]*x[9*idx[jrow]+3];
1416: sum5 += v[jrow]*x[9*idx[jrow]+4];
1417: sum6 += v[jrow]*x[9*idx[jrow]+5];
1418: sum7 += v[jrow]*x[9*idx[jrow]+6];
1419: sum8 += v[jrow]*x[9*idx[jrow]+7];
1420: sum9 += v[jrow]*x[9*idx[jrow]+8];
1421: jrow++;
1422: }
1423: y[9*i] = sum1;
1424: y[9*i+1] = sum2;
1425: y[9*i+2] = sum3;
1426: y[9*i+3] = sum4;
1427: y[9*i+4] = sum5;
1428: y[9*i+5] = sum6;
1429: y[9*i+6] = sum7;
1430: y[9*i+7] = sum8;
1431: y[9*i+8] = sum9;
1432: }
1434: PetscLogFlops(18*a->nz - 9*m);
1435: VecRestoreArray(xx,&x);
1436: VecRestoreArray(yy,&y);
1437: return(0);
1438: }
1440: /* ------------------------------------------------------------------------------*/
1444: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1445: {
1446: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1447: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1448: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1450: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1453: VecSet(yy,zero);
1454: VecGetArray(xx,&x);
1455: VecGetArray(yy,&y);
1457: for (i=0; i<m; i++) {
1458: idx = a->j + a->i[i] ;
1459: v = a->a + a->i[i] ;
1460: n = a->i[i+1] - a->i[i];
1461: alpha1 = x[9*i];
1462: alpha2 = x[9*i+1];
1463: alpha3 = x[9*i+2];
1464: alpha4 = x[9*i+3];
1465: alpha5 = x[9*i+4];
1466: alpha6 = x[9*i+5];
1467: alpha7 = x[9*i+6];
1468: alpha8 = x[9*i+7];
1469: alpha9 = x[9*i+8];
1470: while (n-->0) {
1471: y[9*(*idx)] += alpha1*(*v);
1472: y[9*(*idx)+1] += alpha2*(*v);
1473: y[9*(*idx)+2] += alpha3*(*v);
1474: y[9*(*idx)+3] += alpha4*(*v);
1475: y[9*(*idx)+4] += alpha5*(*v);
1476: y[9*(*idx)+5] += alpha6*(*v);
1477: y[9*(*idx)+6] += alpha7*(*v);
1478: y[9*(*idx)+7] += alpha8*(*v);
1479: y[9*(*idx)+8] += alpha9*(*v);
1480: idx++; v++;
1481: }
1482: }
1483: PetscLogFlops(18*a->nz - 9*b->AIJ->cmap.n);
1484: VecRestoreArray(xx,&x);
1485: VecRestoreArray(yy,&y);
1486: return(0);
1487: }
1491: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1492: {
1493: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1494: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1495: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1497: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1498: PetscInt n,i,jrow,j;
1501: if (yy != zz) {VecCopy(yy,zz);}
1502: VecGetArray(xx,&x);
1503: VecGetArray(zz,&y);
1504: idx = a->j;
1505: v = a->a;
1506: ii = a->i;
1508: for (i=0; i<m; i++) {
1509: jrow = ii[i];
1510: n = ii[i+1] - jrow;
1511: sum1 = 0.0;
1512: sum2 = 0.0;
1513: sum3 = 0.0;
1514: sum4 = 0.0;
1515: sum5 = 0.0;
1516: sum6 = 0.0;
1517: sum7 = 0.0;
1518: sum8 = 0.0;
1519: sum9 = 0.0;
1520: for (j=0; j<n; j++) {
1521: sum1 += v[jrow]*x[9*idx[jrow]];
1522: sum2 += v[jrow]*x[9*idx[jrow]+1];
1523: sum3 += v[jrow]*x[9*idx[jrow]+2];
1524: sum4 += v[jrow]*x[9*idx[jrow]+3];
1525: sum5 += v[jrow]*x[9*idx[jrow]+4];
1526: sum6 += v[jrow]*x[9*idx[jrow]+5];
1527: sum7 += v[jrow]*x[9*idx[jrow]+6];
1528: sum8 += v[jrow]*x[9*idx[jrow]+7];
1529: sum9 += v[jrow]*x[9*idx[jrow]+8];
1530: jrow++;
1531: }
1532: y[9*i] += sum1;
1533: y[9*i+1] += sum2;
1534: y[9*i+2] += sum3;
1535: y[9*i+3] += sum4;
1536: y[9*i+4] += sum5;
1537: y[9*i+5] += sum6;
1538: y[9*i+6] += sum7;
1539: y[9*i+7] += sum8;
1540: y[9*i+8] += sum9;
1541: }
1543: PetscLogFlops(18*a->nz);
1544: VecRestoreArray(xx,&x);
1545: VecRestoreArray(zz,&y);
1546: return(0);
1547: }
1551: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1552: {
1553: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1554: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1555: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1557: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1560: if (yy != zz) {VecCopy(yy,zz);}
1561: VecGetArray(xx,&x);
1562: VecGetArray(zz,&y);
1563: for (i=0; i<m; i++) {
1564: idx = a->j + a->i[i] ;
1565: v = a->a + a->i[i] ;
1566: n = a->i[i+1] - a->i[i];
1567: alpha1 = x[9*i];
1568: alpha2 = x[9*i+1];
1569: alpha3 = x[9*i+2];
1570: alpha4 = x[9*i+3];
1571: alpha5 = x[9*i+4];
1572: alpha6 = x[9*i+5];
1573: alpha7 = x[9*i+6];
1574: alpha8 = x[9*i+7];
1575: alpha9 = x[9*i+8];
1576: while (n-->0) {
1577: y[9*(*idx)] += alpha1*(*v);
1578: y[9*(*idx)+1] += alpha2*(*v);
1579: y[9*(*idx)+2] += alpha3*(*v);
1580: y[9*(*idx)+3] += alpha4*(*v);
1581: y[9*(*idx)+4] += alpha5*(*v);
1582: y[9*(*idx)+5] += alpha6*(*v);
1583: y[9*(*idx)+6] += alpha7*(*v);
1584: y[9*(*idx)+7] += alpha8*(*v);
1585: y[9*(*idx)+8] += alpha9*(*v);
1586: idx++; v++;
1587: }
1588: }
1589: PetscLogFlops(18*a->nz);
1590: VecRestoreArray(xx,&x);
1591: VecRestoreArray(zz,&y);
1592: return(0);
1593: }
1594: /*--------------------------------------------------------------------------------------------*/
1597: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1598: {
1599: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1600: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1601: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1603: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1604: PetscInt n,i,jrow,j;
1607: VecGetArray(xx,&x);
1608: VecGetArray(yy,&y);
1609: idx = a->j;
1610: v = a->a;
1611: ii = a->i;
1613: for (i=0; i<m; i++) {
1614: jrow = ii[i];
1615: n = ii[i+1] - jrow;
1616: sum1 = 0.0;
1617: sum2 = 0.0;
1618: sum3 = 0.0;
1619: sum4 = 0.0;
1620: sum5 = 0.0;
1621: sum6 = 0.0;
1622: sum7 = 0.0;
1623: sum8 = 0.0;
1624: sum9 = 0.0;
1625: sum10 = 0.0;
1626: for (j=0; j<n; j++) {
1627: sum1 += v[jrow]*x[10*idx[jrow]];
1628: sum2 += v[jrow]*x[10*idx[jrow]+1];
1629: sum3 += v[jrow]*x[10*idx[jrow]+2];
1630: sum4 += v[jrow]*x[10*idx[jrow]+3];
1631: sum5 += v[jrow]*x[10*idx[jrow]+4];
1632: sum6 += v[jrow]*x[10*idx[jrow]+5];
1633: sum7 += v[jrow]*x[10*idx[jrow]+6];
1634: sum8 += v[jrow]*x[10*idx[jrow]+7];
1635: sum9 += v[jrow]*x[10*idx[jrow]+8];
1636: sum10 += v[jrow]*x[10*idx[jrow]+9];
1637: jrow++;
1638: }
1639: y[10*i] = sum1;
1640: y[10*i+1] = sum2;
1641: y[10*i+2] = sum3;
1642: y[10*i+3] = sum4;
1643: y[10*i+4] = sum5;
1644: y[10*i+5] = sum6;
1645: y[10*i+6] = sum7;
1646: y[10*i+7] = sum8;
1647: y[10*i+8] = sum9;
1648: y[10*i+9] = sum10;
1649: }
1651: PetscLogFlops(20*a->nz - 10*m);
1652: VecRestoreArray(xx,&x);
1653: VecRestoreArray(yy,&y);
1654: return(0);
1655: }
1659: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1660: {
1661: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1662: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1663: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1665: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1666: PetscInt n,i,jrow,j;
1669: if (yy != zz) {VecCopy(yy,zz);}
1670: VecGetArray(xx,&x);
1671: VecGetArray(zz,&y);
1672: idx = a->j;
1673: v = a->a;
1674: ii = a->i;
1676: for (i=0; i<m; i++) {
1677: jrow = ii[i];
1678: n = ii[i+1] - jrow;
1679: sum1 = 0.0;
1680: sum2 = 0.0;
1681: sum3 = 0.0;
1682: sum4 = 0.0;
1683: sum5 = 0.0;
1684: sum6 = 0.0;
1685: sum7 = 0.0;
1686: sum8 = 0.0;
1687: sum9 = 0.0;
1688: sum10 = 0.0;
1689: for (j=0; j<n; j++) {
1690: sum1 += v[jrow]*x[10*idx[jrow]];
1691: sum2 += v[jrow]*x[10*idx[jrow]+1];
1692: sum3 += v[jrow]*x[10*idx[jrow]+2];
1693: sum4 += v[jrow]*x[10*idx[jrow]+3];
1694: sum5 += v[jrow]*x[10*idx[jrow]+4];
1695: sum6 += v[jrow]*x[10*idx[jrow]+5];
1696: sum7 += v[jrow]*x[10*idx[jrow]+6];
1697: sum8 += v[jrow]*x[10*idx[jrow]+7];
1698: sum9 += v[jrow]*x[10*idx[jrow]+8];
1699: sum10 += v[jrow]*x[10*idx[jrow]+9];
1700: jrow++;
1701: }
1702: y[10*i] += sum1;
1703: y[10*i+1] += sum2;
1704: y[10*i+2] += sum3;
1705: y[10*i+3] += sum4;
1706: y[10*i+4] += sum5;
1707: y[10*i+5] += sum6;
1708: y[10*i+6] += sum7;
1709: y[10*i+7] += sum8;
1710: y[10*i+8] += sum9;
1711: y[10*i+9] += sum10;
1712: }
1714: PetscLogFlops(20*a->nz - 10*m);
1715: VecRestoreArray(xx,&x);
1716: VecRestoreArray(yy,&y);
1717: return(0);
1718: }
1722: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1723: {
1724: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1725: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1726: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1728: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1731: VecSet(yy,zero);
1732: VecGetArray(xx,&x);
1733: VecGetArray(yy,&y);
1735: for (i=0; i<m; i++) {
1736: idx = a->j + a->i[i] ;
1737: v = a->a + a->i[i] ;
1738: n = a->i[i+1] - a->i[i];
1739: alpha1 = x[10*i];
1740: alpha2 = x[10*i+1];
1741: alpha3 = x[10*i+2];
1742: alpha4 = x[10*i+3];
1743: alpha5 = x[10*i+4];
1744: alpha6 = x[10*i+5];
1745: alpha7 = x[10*i+6];
1746: alpha8 = x[10*i+7];
1747: alpha9 = x[10*i+8];
1748: alpha10 = x[10*i+9];
1749: while (n-->0) {
1750: y[10*(*idx)] += alpha1*(*v);
1751: y[10*(*idx)+1] += alpha2*(*v);
1752: y[10*(*idx)+2] += alpha3*(*v);
1753: y[10*(*idx)+3] += alpha4*(*v);
1754: y[10*(*idx)+4] += alpha5*(*v);
1755: y[10*(*idx)+5] += alpha6*(*v);
1756: y[10*(*idx)+6] += alpha7*(*v);
1757: y[10*(*idx)+7] += alpha8*(*v);
1758: y[10*(*idx)+8] += alpha9*(*v);
1759: y[10*(*idx)+9] += alpha10*(*v);
1760: idx++; v++;
1761: }
1762: }
1763: PetscLogFlops(20*a->nz - 10*b->AIJ->cmap.n);
1764: VecRestoreArray(xx,&x);
1765: VecRestoreArray(yy,&y);
1766: return(0);
1767: }
1771: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1772: {
1773: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1774: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1775: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
17