[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[no subject]




Dear all,

I have a problem with MPI in PETSc that is probably simple and you will hopefully know the answer to. In my code I treat the local rows of a MPIAIJ matrix one by one. If the number of local rows is equal on all processors, everything works well. However, if the number of rows is different, the program crashes. I have attached a simple example program to demonstrate what I want to do.
Suppose process A has one more local row than process B. In the debugger I found out that the crash occurs in "MPI_Allreduce". Process A will call MPI_Allreduce one more time than process B, because it processes an additional row. While process A is still waiting for the result of the last MPI_Allreduce, process B will proceed and call MPI_Allreduce in a different context. This is, when the program crashes.


What is the best way to make sure that all MPI calls are matched on the different processes?

Thanks,
Arvid

static char help[] = "Example program with different row sizes\n\n";

#include "petscmat.h"

#undef __FUNCT__
#define __FUNCT__ "main"
int main(int Argc,char **Args)
{
  const PetscInt  size = 3;
  PetscErrorCode  ierr;
  Mat             cmat, dmat;
  PetscInt        cmat_loc_start, cmat_loc_end, cmat_loc_size;
  PetscInt        *rowsize_arr = 0;
  const PetscInt  **row_arr = 0;
  IS              *is_arr = 0;
  PetscInt        a;
  Mat            *submat_arr = 0;

  PetscInitialize(&Argc,&Args,(char *)0,help);

ierr = MatCreateMPIAIJ(PETSC_COMM_WORLD, PETSC_DECIDE, PETSC_DECIDE, size, size,
1, PETSC_NULL, 2, PETSC_NULL, &cmat);CHKERRQ(ierr);
ierr = MatAssemblyBegin(cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
ierr = MatAssemblyEnd(cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
ierr = MatCreateMPIAIJ(PETSC_COMM_WORLD, PETSC_DECIDE, PETSC_DECIDE, size, size,
1, PETSC_NULL, 2, PETSC_NULL, &dmat);CHKERRQ(ierr);
ierr = MatAssemblyBegin(dmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
ierr = MatAssemblyEnd(dmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);


ierr = MatGetOwnershipRange(cmat, &cmat_loc_start, &cmat_loc_end); CHKERRQ(ierr);
cmat_loc_size = cmat_loc_end-cmat_loc_start;


ierr = PetscMalloc(sizeof(PetscInt *)*cmat_loc_size, &row_arr); CHKERRQ(ierr);
ierr = PetscMalloc(sizeof(PetscInt)*cmat_loc_size, &rowsize_arr); CHKERRQ(ierr);
ierr = PetscMalloc(sizeof(IS)*cmat_loc_size, &is_arr); CHKERRQ(ierr);


for( a=0; a<cmat_loc_size; a++ ) {
ierr = MatGetRow(cmat, a+cmat_loc_start, &(rowsize_arr[a]), & (row_arr[a]), 0); CHKERRQ(ierr);
ierr = ISCreateGeneral(PETSC_COMM_WORLD, rowsize_arr[a], row_arr [a], &(is_arr[a])); CHKERRQ(ierr);
ierr = MatRestoreRow(cmat, a+cmat_loc_start, &(rowsize_arr[a]), & (row_arr[a]), 0); CHKERRQ(ierr);
}


ierr = PetscMalloc(sizeof(Mat)*cmat_loc_size, &submat_arr); CHKERRQ (ierr);
ierr = MatGetSubMatrices(dmat, cmat_loc_size, is_arr, is_arr, MAT_INITIAL_MATRIX, &submat_arr); CHKERRQ(ierr);


  ierr = PetscFinalize();CHKERRQ(ierr);
  return 0;
}