From 5478b9a0759327848a99ca6bc057d6467278d09d Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Wed, 12 Jun 2019 13:58:40 +0100 Subject: [PATCH 01/15] Fix bad setup of halo_owner in cd_renum_block --- base/tools/psb_cd_renum_block.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/tools/psb_cd_renum_block.F90 b/base/tools/psb_cd_renum_block.F90 index ea6f940b..0e26e6b5 100644 --- a/base/tools/psb_cd_renum_block.F90 +++ b/base/tools/psb_cd_renum_block.F90 @@ -142,7 +142,7 @@ subroutine psb_cd_renum_block(desc_in, desc_out, info) write(debug_unit,*) me,' ',trim(name),': Done g2l_ins ',gidx(:),':',lidx(:),' :',reflidx(:) end if - if (info == psb_success_) call desc_in%indxmap%fnd_owner(gidx(n_row+1:n_col),hproc,info) + if (info == psb_success_) call blck_map%indxmap%fnd_owner(gidx(n_row+1:n_col),hproc,info) if (info == psb_success_) call blck_map%set_halo_owner(hproc,info) if (info == 0) call blck_map%asb(info) From 66433a46bbfdce7806114261c23424e0ec3bc0bf Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Thu, 27 Jun 2019 18:05:50 +0100 Subject: [PATCH 02/15] Fix call to fnd_owner method --- base/tools/psb_cd_renum_block.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/tools/psb_cd_renum_block.F90 b/base/tools/psb_cd_renum_block.F90 index 0e26e6b5..ddfa4545 100644 --- a/base/tools/psb_cd_renum_block.F90 +++ b/base/tools/psb_cd_renum_block.F90 @@ -142,7 +142,7 @@ subroutine psb_cd_renum_block(desc_in, desc_out, info) write(debug_unit,*) me,' ',trim(name),': Done g2l_ins ',gidx(:),':',lidx(:),' :',reflidx(:) end if - if (info == psb_success_) call blck_map%indxmap%fnd_owner(gidx(n_row+1:n_col),hproc,info) + if (info == psb_success_) call blck_map%fnd_owner(gidx(n_row+1:n_col),hproc,info) if (info == psb_success_) call blck_map%set_halo_owner(hproc,info) if (info == 0) call blck_map%asb(info) From 727a4683744813d50ef3e608df8b95cd4d58a9d0 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Mon, 1 Jul 2019 22:44:24 +0100 Subject: [PATCH 03/15] Fixes for NR/NC allocation. --- base/serial/impl/psb_c_csc_impl.f90 | 2 +- base/serial/impl/psb_c_csr_impl.f90 | 8 +++----- base/serial/impl/psb_d_csc_impl.f90 | 2 +- base/serial/impl/psb_d_csr_impl.f90 | 8 +++----- base/serial/impl/psb_s_csc_impl.f90 | 2 +- base/serial/impl/psb_s_csr_impl.f90 | 8 +++----- base/serial/impl/psb_z_csc_impl.f90 | 2 +- base/serial/impl/psb_z_csr_impl.f90 | 8 +++----- 8 files changed, 16 insertions(+), 24 deletions(-) diff --git a/base/serial/impl/psb_c_csc_impl.f90 b/base/serial/impl/psb_c_csc_impl.f90 index 23e8f4a8..d9483937 100644 --- a/base/serial/impl/psb_c_csc_impl.f90 +++ b/base/serial/impl/psb_c_csc_impl.f90 @@ -2298,7 +2298,7 @@ subroutine psb_c_mv_csc_from_coo(a,b,info) call move_alloc(b%ja,itemp) call move_alloc(b%ia,a%ia) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%icp,info) + call psb_realloc(nc+1,a%icp,info) call b%free() a%icp(:) = 0 diff --git a/base/serial/impl/psb_c_csr_impl.f90 b/base/serial/impl/psb_c_csr_impl.f90 index e248c5e1..472f0a8f 100644 --- a/base/serial/impl/psb_c_csr_impl.f90 +++ b/base/serial/impl/psb_c_csr_impl.f90 @@ -999,8 +999,6 @@ contains end subroutine psb_c_csr_cssv - - subroutine psb_c_csr_cssm(alpha,a,x,beta,y,info,trans) use psb_error_mod use psb_string_mod @@ -2962,7 +2960,7 @@ subroutine psb_c_cp_csr_from_coo(a,b,info) call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call tmp%free() else @@ -3131,7 +3129,7 @@ subroutine psb_c_mv_csr_from_coo(a,b,info) call move_alloc(b%ia,itemp) call move_alloc(b%ja,a%ja) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call b%free() @@ -3386,7 +3384,7 @@ subroutine psb_ccsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = int(1.25*(nza+nzb)) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) diff --git a/base/serial/impl/psb_d_csc_impl.f90 b/base/serial/impl/psb_d_csc_impl.f90 index ef68164d..25085229 100644 --- a/base/serial/impl/psb_d_csc_impl.f90 +++ b/base/serial/impl/psb_d_csc_impl.f90 @@ -2298,7 +2298,7 @@ subroutine psb_d_mv_csc_from_coo(a,b,info) call move_alloc(b%ja,itemp) call move_alloc(b%ia,a%ia) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%icp,info) + call psb_realloc(nc+1,a%icp,info) call b%free() a%icp(:) = 0 diff --git a/base/serial/impl/psb_d_csr_impl.f90 b/base/serial/impl/psb_d_csr_impl.f90 index a6fb0b18..d06c4649 100644 --- a/base/serial/impl/psb_d_csr_impl.f90 +++ b/base/serial/impl/psb_d_csr_impl.f90 @@ -999,8 +999,6 @@ contains end subroutine psb_d_csr_cssv - - subroutine psb_d_csr_cssm(alpha,a,x,beta,y,info,trans) use psb_error_mod use psb_string_mod @@ -2962,7 +2960,7 @@ subroutine psb_d_cp_csr_from_coo(a,b,info) call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call tmp%free() else @@ -3131,7 +3129,7 @@ subroutine psb_d_mv_csr_from_coo(a,b,info) call move_alloc(b%ia,itemp) call move_alloc(b%ja,a%ja) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call b%free() @@ -3386,7 +3384,7 @@ subroutine psb_dcsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = int(1.25*(nza+nzb)) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) diff --git a/base/serial/impl/psb_s_csc_impl.f90 b/base/serial/impl/psb_s_csc_impl.f90 index 6318db9d..6eb0d830 100644 --- a/base/serial/impl/psb_s_csc_impl.f90 +++ b/base/serial/impl/psb_s_csc_impl.f90 @@ -2298,7 +2298,7 @@ subroutine psb_s_mv_csc_from_coo(a,b,info) call move_alloc(b%ja,itemp) call move_alloc(b%ia,a%ia) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%icp,info) + call psb_realloc(nc+1,a%icp,info) call b%free() a%icp(:) = 0 diff --git a/base/serial/impl/psb_s_csr_impl.f90 b/base/serial/impl/psb_s_csr_impl.f90 index a5bca393..e2fc8298 100644 --- a/base/serial/impl/psb_s_csr_impl.f90 +++ b/base/serial/impl/psb_s_csr_impl.f90 @@ -999,8 +999,6 @@ contains end subroutine psb_s_csr_cssv - - subroutine psb_s_csr_cssm(alpha,a,x,beta,y,info,trans) use psb_error_mod use psb_string_mod @@ -2962,7 +2960,7 @@ subroutine psb_s_cp_csr_from_coo(a,b,info) call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call tmp%free() else @@ -3131,7 +3129,7 @@ subroutine psb_s_mv_csr_from_coo(a,b,info) call move_alloc(b%ia,itemp) call move_alloc(b%ja,a%ja) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call b%free() @@ -3386,7 +3384,7 @@ subroutine psb_scsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = int(1.25*(nza+nzb)) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) diff --git a/base/serial/impl/psb_z_csc_impl.f90 b/base/serial/impl/psb_z_csc_impl.f90 index bd14f8a3..2d0fa913 100644 --- a/base/serial/impl/psb_z_csc_impl.f90 +++ b/base/serial/impl/psb_z_csc_impl.f90 @@ -2298,7 +2298,7 @@ subroutine psb_z_mv_csc_from_coo(a,b,info) call move_alloc(b%ja,itemp) call move_alloc(b%ia,a%ia) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%icp,info) + call psb_realloc(nc+1,a%icp,info) call b%free() a%icp(:) = 0 diff --git a/base/serial/impl/psb_z_csr_impl.f90 b/base/serial/impl/psb_z_csr_impl.f90 index b258eaac..015931c6 100644 --- a/base/serial/impl/psb_z_csr_impl.f90 +++ b/base/serial/impl/psb_z_csr_impl.f90 @@ -999,8 +999,6 @@ contains end subroutine psb_z_csr_cssv - - subroutine psb_z_csr_cssm(alpha,a,x,beta,y,info,trans) use psb_error_mod use psb_string_mod @@ -2962,7 +2960,7 @@ subroutine psb_z_cp_csr_from_coo(a,b,info) call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call tmp%free() else @@ -3131,7 +3129,7 @@ subroutine psb_z_mv_csr_from_coo(a,b,info) call move_alloc(b%ia,itemp) call move_alloc(b%ja,a%ja) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call b%free() @@ -3386,7 +3384,7 @@ subroutine psb_zcsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = int(1.25*(nza+nzb)) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) From 239d59b2a793ca2c587f60f8b5c2f7f0c06e720e Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Mon, 8 Jul 2019 15:29:26 +0100 Subject: [PATCH 04/15] Fix cbind test program. --- cbind/test/pargen/ppdec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cbind/test/pargen/ppdec.c b/cbind/test/pargen/ppdec.c index 32c5b600..2e803505 100644 --- a/cbind/test/pargen/ppdec.c +++ b/cbind/test/pargen/ppdec.c @@ -131,7 +131,7 @@ int matgen(int ictxt, int ng,int idim,int vg[],psb_c_dspmat *ah,psb_c_descriptor info = 0; psb_c_info(ictxt,&iam,&np); - deltah = (double) 1.0/(idim+2); + deltah = (double) 1.0/(idim+1); sqdeltah = deltah*deltah; deltah2 = 2.0* deltah; psb_c_set_index_base(0); From 00df8edd06179d821723a67d7e7a16bf1e92fd4a Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Mon, 8 Jul 2019 15:59:15 +0100 Subject: [PATCH 05/15] Improve memory usage in fix_coo, and mv_to/mv_from CSR/CSC. --- base/serial/impl/psb_c_coo_impl.f90 | 23 ++++++++++++++++++----- base/serial/impl/psb_c_csc_impl.f90 | 2 +- base/serial/impl/psb_c_csr_impl.f90 | 8 +++++--- base/serial/impl/psb_d_coo_impl.f90 | 23 ++++++++++++++++++----- base/serial/impl/psb_d_csc_impl.f90 | 2 +- base/serial/impl/psb_d_csr_impl.f90 | 8 +++++--- base/serial/impl/psb_s_coo_impl.f90 | 23 ++++++++++++++++++----- base/serial/impl/psb_s_csc_impl.f90 | 2 +- base/serial/impl/psb_s_csr_impl.f90 | 8 +++++--- base/serial/impl/psb_z_coo_impl.f90 | 23 ++++++++++++++++++----- base/serial/impl/psb_z_csc_impl.f90 | 2 +- base/serial/impl/psb_z_csr_impl.f90 | 8 +++++--- 12 files changed, 96 insertions(+), 36 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.f90 b/base/serial/impl/psb_c_coo_impl.f90 index 695e3583..3d0db0b3 100644 --- a/base/serial/impl/psb_c_coo_impl.f90 +++ b/base/serial/impl/psb_c_coo_impl.f90 @@ -3416,21 +3416,26 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) dupl_ = dupl - - allocate(iaux(max(nr,nc,nzin)+2),stat=info) + allocate(iaux(nzin+2),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if - - allocate(ias(nzin),jas(nzin),vs(nzin),ix2(max(nr,nc,nzin)+2), stat=info) - use_buffers = (info == 0) select case(idir_) case(psb_row_major_) ! Row major order + + if (nr <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then if (.not.( (ia(1) < 1).or.(ia(1)> nr)) ) then iaux(:) = 0 @@ -3752,6 +3757,14 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) case(psb_col_major_) + if (nc <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then iaux(:) = 0 if (.not.( (ja(1) < 1).or.(ja(1)> nc)) ) then diff --git a/base/serial/impl/psb_c_csc_impl.f90 b/base/serial/impl/psb_c_csc_impl.f90 index d9483937..23e8f4a8 100644 --- a/base/serial/impl/psb_c_csc_impl.f90 +++ b/base/serial/impl/psb_c_csc_impl.f90 @@ -2298,7 +2298,7 @@ subroutine psb_c_mv_csc_from_coo(a,b,info) call move_alloc(b%ja,itemp) call move_alloc(b%ia,a%ia) call move_alloc(b%val,a%val) - call psb_realloc(nc+1,a%icp,info) + call psb_realloc(max(nr+1,nc+1),a%icp,info) call b%free() a%icp(:) = 0 diff --git a/base/serial/impl/psb_c_csr_impl.f90 b/base/serial/impl/psb_c_csr_impl.f90 index 472f0a8f..e248c5e1 100644 --- a/base/serial/impl/psb_c_csr_impl.f90 +++ b/base/serial/impl/psb_c_csr_impl.f90 @@ -999,6 +999,8 @@ contains end subroutine psb_c_csr_cssv + + subroutine psb_c_csr_cssm(alpha,a,x,beta,y,info,trans) use psb_error_mod use psb_string_mod @@ -2960,7 +2962,7 @@ subroutine psb_c_cp_csr_from_coo(a,b,info) call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%val,a%val) - call psb_realloc(nr+1,a%irp,info) + call psb_realloc(max(nr+1,nc+1),a%irp,info) call tmp%free() else @@ -3129,7 +3131,7 @@ subroutine psb_c_mv_csr_from_coo(a,b,info) call move_alloc(b%ia,itemp) call move_alloc(b%ja,a%ja) call move_alloc(b%val,a%val) - call psb_realloc(nr+1,a%irp,info) + call psb_realloc(max(nr+1,nc+1),a%irp,info) call b%free() @@ -3384,7 +3386,7 @@ subroutine psb_ccsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = int(1.25*(nza+nzb)) + nzc = 2*(nza+nzb) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) diff --git a/base/serial/impl/psb_d_coo_impl.f90 b/base/serial/impl/psb_d_coo_impl.f90 index 47c0b107..89aa8b97 100644 --- a/base/serial/impl/psb_d_coo_impl.f90 +++ b/base/serial/impl/psb_d_coo_impl.f90 @@ -3416,21 +3416,26 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) dupl_ = dupl - - allocate(iaux(max(nr,nc,nzin)+2),stat=info) + allocate(iaux(nzin+2),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if - - allocate(ias(nzin),jas(nzin),vs(nzin),ix2(max(nr,nc,nzin)+2), stat=info) - use_buffers = (info == 0) select case(idir_) case(psb_row_major_) ! Row major order + + if (nr <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then if (.not.( (ia(1) < 1).or.(ia(1)> nr)) ) then iaux(:) = 0 @@ -3752,6 +3757,14 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) case(psb_col_major_) + if (nc <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then iaux(:) = 0 if (.not.( (ja(1) < 1).or.(ja(1)> nc)) ) then diff --git a/base/serial/impl/psb_d_csc_impl.f90 b/base/serial/impl/psb_d_csc_impl.f90 index 25085229..ef68164d 100644 --- a/base/serial/impl/psb_d_csc_impl.f90 +++ b/base/serial/impl/psb_d_csc_impl.f90 @@ -2298,7 +2298,7 @@ subroutine psb_d_mv_csc_from_coo(a,b,info) call move_alloc(b%ja,itemp) call move_alloc(b%ia,a%ia) call move_alloc(b%val,a%val) - call psb_realloc(nc+1,a%icp,info) + call psb_realloc(max(nr+1,nc+1),a%icp,info) call b%free() a%icp(:) = 0 diff --git a/base/serial/impl/psb_d_csr_impl.f90 b/base/serial/impl/psb_d_csr_impl.f90 index d06c4649..a6fb0b18 100644 --- a/base/serial/impl/psb_d_csr_impl.f90 +++ b/base/serial/impl/psb_d_csr_impl.f90 @@ -999,6 +999,8 @@ contains end subroutine psb_d_csr_cssv + + subroutine psb_d_csr_cssm(alpha,a,x,beta,y,info,trans) use psb_error_mod use psb_string_mod @@ -2960,7 +2962,7 @@ subroutine psb_d_cp_csr_from_coo(a,b,info) call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%val,a%val) - call psb_realloc(nr+1,a%irp,info) + call psb_realloc(max(nr+1,nc+1),a%irp,info) call tmp%free() else @@ -3129,7 +3131,7 @@ subroutine psb_d_mv_csr_from_coo(a,b,info) call move_alloc(b%ia,itemp) call move_alloc(b%ja,a%ja) call move_alloc(b%val,a%val) - call psb_realloc(nr+1,a%irp,info) + call psb_realloc(max(nr+1,nc+1),a%irp,info) call b%free() @@ -3384,7 +3386,7 @@ subroutine psb_dcsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = int(1.25*(nza+nzb)) + nzc = 2*(nza+nzb) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) diff --git a/base/serial/impl/psb_s_coo_impl.f90 b/base/serial/impl/psb_s_coo_impl.f90 index b80a8a99..d327e12e 100644 --- a/base/serial/impl/psb_s_coo_impl.f90 +++ b/base/serial/impl/psb_s_coo_impl.f90 @@ -3416,21 +3416,26 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) dupl_ = dupl - - allocate(iaux(max(nr,nc,nzin)+2),stat=info) + allocate(iaux(nzin+2),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if - - allocate(ias(nzin),jas(nzin),vs(nzin),ix2(max(nr,nc,nzin)+2), stat=info) - use_buffers = (info == 0) select case(idir_) case(psb_row_major_) ! Row major order + + if (nr <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then if (.not.( (ia(1) < 1).or.(ia(1)> nr)) ) then iaux(:) = 0 @@ -3752,6 +3757,14 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) case(psb_col_major_) + if (nc <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then iaux(:) = 0 if (.not.( (ja(1) < 1).or.(ja(1)> nc)) ) then diff --git a/base/serial/impl/psb_s_csc_impl.f90 b/base/serial/impl/psb_s_csc_impl.f90 index 6eb0d830..6318db9d 100644 --- a/base/serial/impl/psb_s_csc_impl.f90 +++ b/base/serial/impl/psb_s_csc_impl.f90 @@ -2298,7 +2298,7 @@ subroutine psb_s_mv_csc_from_coo(a,b,info) call move_alloc(b%ja,itemp) call move_alloc(b%ia,a%ia) call move_alloc(b%val,a%val) - call psb_realloc(nc+1,a%icp,info) + call psb_realloc(max(nr+1,nc+1),a%icp,info) call b%free() a%icp(:) = 0 diff --git a/base/serial/impl/psb_s_csr_impl.f90 b/base/serial/impl/psb_s_csr_impl.f90 index e2fc8298..a5bca393 100644 --- a/base/serial/impl/psb_s_csr_impl.f90 +++ b/base/serial/impl/psb_s_csr_impl.f90 @@ -999,6 +999,8 @@ contains end subroutine psb_s_csr_cssv + + subroutine psb_s_csr_cssm(alpha,a,x,beta,y,info,trans) use psb_error_mod use psb_string_mod @@ -2960,7 +2962,7 @@ subroutine psb_s_cp_csr_from_coo(a,b,info) call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%val,a%val) - call psb_realloc(nr+1,a%irp,info) + call psb_realloc(max(nr+1,nc+1),a%irp,info) call tmp%free() else @@ -3129,7 +3131,7 @@ subroutine psb_s_mv_csr_from_coo(a,b,info) call move_alloc(b%ia,itemp) call move_alloc(b%ja,a%ja) call move_alloc(b%val,a%val) - call psb_realloc(nr+1,a%irp,info) + call psb_realloc(max(nr+1,nc+1),a%irp,info) call b%free() @@ -3384,7 +3386,7 @@ subroutine psb_scsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = int(1.25*(nza+nzb)) + nzc = 2*(nza+nzb) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) diff --git a/base/serial/impl/psb_z_coo_impl.f90 b/base/serial/impl/psb_z_coo_impl.f90 index e1883e03..d5ee9740 100644 --- a/base/serial/impl/psb_z_coo_impl.f90 +++ b/base/serial/impl/psb_z_coo_impl.f90 @@ -3416,21 +3416,26 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) dupl_ = dupl - - allocate(iaux(max(nr,nc,nzin)+2),stat=info) + allocate(iaux(nzin+2),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if - - allocate(ias(nzin),jas(nzin),vs(nzin),ix2(max(nr,nc,nzin)+2), stat=info) - use_buffers = (info == 0) select case(idir_) case(psb_row_major_) ! Row major order + + if (nr <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then if (.not.( (ia(1) < 1).or.(ia(1)> nr)) ) then iaux(:) = 0 @@ -3752,6 +3757,14 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) case(psb_col_major_) + if (nc <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then iaux(:) = 0 if (.not.( (ja(1) < 1).or.(ja(1)> nc)) ) then diff --git a/base/serial/impl/psb_z_csc_impl.f90 b/base/serial/impl/psb_z_csc_impl.f90 index 2d0fa913..bd14f8a3 100644 --- a/base/serial/impl/psb_z_csc_impl.f90 +++ b/base/serial/impl/psb_z_csc_impl.f90 @@ -2298,7 +2298,7 @@ subroutine psb_z_mv_csc_from_coo(a,b,info) call move_alloc(b%ja,itemp) call move_alloc(b%ia,a%ia) call move_alloc(b%val,a%val) - call psb_realloc(nc+1,a%icp,info) + call psb_realloc(max(nr+1,nc+1),a%icp,info) call b%free() a%icp(:) = 0 diff --git a/base/serial/impl/psb_z_csr_impl.f90 b/base/serial/impl/psb_z_csr_impl.f90 index 015931c6..b258eaac 100644 --- a/base/serial/impl/psb_z_csr_impl.f90 +++ b/base/serial/impl/psb_z_csr_impl.f90 @@ -999,6 +999,8 @@ contains end subroutine psb_z_csr_cssv + + subroutine psb_z_csr_cssm(alpha,a,x,beta,y,info,trans) use psb_error_mod use psb_string_mod @@ -2960,7 +2962,7 @@ subroutine psb_z_cp_csr_from_coo(a,b,info) call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%val,a%val) - call psb_realloc(nr+1,a%irp,info) + call psb_realloc(max(nr+1,nc+1),a%irp,info) call tmp%free() else @@ -3129,7 +3131,7 @@ subroutine psb_z_mv_csr_from_coo(a,b,info) call move_alloc(b%ia,itemp) call move_alloc(b%ja,a%ja) call move_alloc(b%val,a%val) - call psb_realloc(nr+1,a%irp,info) + call psb_realloc(max(nr+1,nc+1),a%irp,info) call b%free() @@ -3384,7 +3386,7 @@ subroutine psb_zcsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = int(1.25*(nza+nzb)) + nzc = 2*(nza+nzb) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) From e79850acbdf7489f29e47c9e3a73d134ed233242 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 9 Jul 2019 13:17:36 +0100 Subject: [PATCH 06/15] Update version in configure. --- configure.ac | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 7b4e1370..d89ba886 100755 --- a/configure.ac +++ b/configure.ac @@ -36,11 +36,11 @@ dnl NOTE : There is no cross compilation support. ############################################################################### # NOTE: the literal for version (the second argument to AC_INIT should be a literal!) -AC_INIT([PSBLAS],3.5, [https://github.com/sfilippone/psblas3/issues]) +AC_INIT([PSBLAS],3.6, [https://github.com/sfilippone/psblas3/issues]) # VERSION is the file containing the PSBLAS version code # FIXME -psblas_cv_version="3.5" +psblas_cv_version="3.6" # A sample source file AC_CONFIG_SRCDIR([base/modules/psb_base_mod.f90]) From adc3d37a11b51bc5779a1c883808dacd8938ddcf Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 9 Jul 2019 13:17:44 +0100 Subject: [PATCH 07/15] Memory allocation fixes for CSR/CSC. --- base/serial/impl/psb_c_csc_impl.f90 | 5 ++--- base/serial/impl/psb_c_csr_impl.f90 | 9 ++++----- base/serial/impl/psb_d_csc_impl.f90 | 5 ++--- base/serial/impl/psb_d_csr_impl.f90 | 9 ++++----- base/serial/impl/psb_s_csc_impl.f90 | 5 ++--- base/serial/impl/psb_s_csr_impl.f90 | 9 ++++----- base/serial/impl/psb_z_csc_impl.f90 | 5 ++--- base/serial/impl/psb_z_csr_impl.f90 | 9 ++++----- 8 files changed, 24 insertions(+), 32 deletions(-) diff --git a/base/serial/impl/psb_c_csc_impl.f90 b/base/serial/impl/psb_c_csc_impl.f90 index 23e8f4a8..6491636e 100644 --- a/base/serial/impl/psb_c_csc_impl.f90 +++ b/base/serial/impl/psb_c_csc_impl.f90 @@ -2298,7 +2298,7 @@ subroutine psb_c_mv_csc_from_coo(a,b,info) call move_alloc(b%ja,itemp) call move_alloc(b%ia,a%ia) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%icp,info) + call psb_realloc(nc+1,a%icp,info) call b%free() a%icp(:) = 0 @@ -2570,8 +2570,7 @@ subroutine psb_c_csc_reallocate_nz(nz,a) call psb_realloc(max(nz,ione),a%ia,info) if (info == psb_success_) call psb_realloc(max(nz,ione),a%val,info) - if (info == psb_success_) call psb_realloc(max(nz,a%get_nrows()+1,& - & a%get_ncols()+1), a%icp,info) + if (info == psb_success_) call psb_realloc(a%get_ncols()+1, a%icp,info) if (info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) goto 9999 diff --git a/base/serial/impl/psb_c_csr_impl.f90 b/base/serial/impl/psb_c_csr_impl.f90 index e248c5e1..9ad735d5 100644 --- a/base/serial/impl/psb_c_csr_impl.f90 +++ b/base/serial/impl/psb_c_csr_impl.f90 @@ -1712,8 +1712,7 @@ subroutine psb_c_csr_reallocate_nz(nz,a) call psb_realloc(max(nz,ione),a%ja,info) if (info == psb_success_) call psb_realloc(max(nz,ione),a%val,info) - if (info == psb_success_) call psb_realloc(& - & max(nz,a%get_nrows()+1,a%get_ncols()+1),a%irp,info) + if (info == psb_success_) call psb_realloc(a%get_nrows()+1,a%irp,info) if (info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) goto 9999 @@ -2962,7 +2961,7 @@ subroutine psb_c_cp_csr_from_coo(a,b,info) call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call tmp%free() else @@ -2980,7 +2979,7 @@ subroutine psb_c_cp_csr_from_coo(a,b,info) call psb_safe_ab_cpy(b%ia,itemp,info) if (info == psb_success_) call psb_safe_ab_cpy(b%ja,a%ja,info) if (info == psb_success_) call psb_safe_ab_cpy(b%val,a%val,info) - if (info == psb_success_) call psb_realloc(max(nr+1,nc+1),a%irp,info) + if (info == psb_success_) call psb_realloc(nr+1,a%irp,info) endif @@ -3131,7 +3130,7 @@ subroutine psb_c_mv_csr_from_coo(a,b,info) call move_alloc(b%ia,itemp) call move_alloc(b%ja,a%ja) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call b%free() diff --git a/base/serial/impl/psb_d_csc_impl.f90 b/base/serial/impl/psb_d_csc_impl.f90 index ef68164d..39fb8ff5 100644 --- a/base/serial/impl/psb_d_csc_impl.f90 +++ b/base/serial/impl/psb_d_csc_impl.f90 @@ -2298,7 +2298,7 @@ subroutine psb_d_mv_csc_from_coo(a,b,info) call move_alloc(b%ja,itemp) call move_alloc(b%ia,a%ia) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%icp,info) + call psb_realloc(nc+1,a%icp,info) call b%free() a%icp(:) = 0 @@ -2570,8 +2570,7 @@ subroutine psb_d_csc_reallocate_nz(nz,a) call psb_realloc(max(nz,ione),a%ia,info) if (info == psb_success_) call psb_realloc(max(nz,ione),a%val,info) - if (info == psb_success_) call psb_realloc(max(nz,a%get_nrows()+1,& - & a%get_ncols()+1), a%icp,info) + if (info == psb_success_) call psb_realloc(a%get_ncols()+1, a%icp,info) if (info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) goto 9999 diff --git a/base/serial/impl/psb_d_csr_impl.f90 b/base/serial/impl/psb_d_csr_impl.f90 index a6fb0b18..47c4de8e 100644 --- a/base/serial/impl/psb_d_csr_impl.f90 +++ b/base/serial/impl/psb_d_csr_impl.f90 @@ -1712,8 +1712,7 @@ subroutine psb_d_csr_reallocate_nz(nz,a) call psb_realloc(max(nz,ione),a%ja,info) if (info == psb_success_) call psb_realloc(max(nz,ione),a%val,info) - if (info == psb_success_) call psb_realloc(& - & max(nz,a%get_nrows()+1,a%get_ncols()+1),a%irp,info) + if (info == psb_success_) call psb_realloc(a%get_nrows()+1,a%irp,info) if (info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) goto 9999 @@ -2962,7 +2961,7 @@ subroutine psb_d_cp_csr_from_coo(a,b,info) call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call tmp%free() else @@ -2980,7 +2979,7 @@ subroutine psb_d_cp_csr_from_coo(a,b,info) call psb_safe_ab_cpy(b%ia,itemp,info) if (info == psb_success_) call psb_safe_ab_cpy(b%ja,a%ja,info) if (info == psb_success_) call psb_safe_ab_cpy(b%val,a%val,info) - if (info == psb_success_) call psb_realloc(max(nr+1,nc+1),a%irp,info) + if (info == psb_success_) call psb_realloc(nr+1,a%irp,info) endif @@ -3131,7 +3130,7 @@ subroutine psb_d_mv_csr_from_coo(a,b,info) call move_alloc(b%ia,itemp) call move_alloc(b%ja,a%ja) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call b%free() diff --git a/base/serial/impl/psb_s_csc_impl.f90 b/base/serial/impl/psb_s_csc_impl.f90 index 6318db9d..a28f3f76 100644 --- a/base/serial/impl/psb_s_csc_impl.f90 +++ b/base/serial/impl/psb_s_csc_impl.f90 @@ -2298,7 +2298,7 @@ subroutine psb_s_mv_csc_from_coo(a,b,info) call move_alloc(b%ja,itemp) call move_alloc(b%ia,a%ia) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%icp,info) + call psb_realloc(nc+1,a%icp,info) call b%free() a%icp(:) = 0 @@ -2570,8 +2570,7 @@ subroutine psb_s_csc_reallocate_nz(nz,a) call psb_realloc(max(nz,ione),a%ia,info) if (info == psb_success_) call psb_realloc(max(nz,ione),a%val,info) - if (info == psb_success_) call psb_realloc(max(nz,a%get_nrows()+1,& - & a%get_ncols()+1), a%icp,info) + if (info == psb_success_) call psb_realloc(a%get_ncols()+1, a%icp,info) if (info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) goto 9999 diff --git a/base/serial/impl/psb_s_csr_impl.f90 b/base/serial/impl/psb_s_csr_impl.f90 index a5bca393..424d54e0 100644 --- a/base/serial/impl/psb_s_csr_impl.f90 +++ b/base/serial/impl/psb_s_csr_impl.f90 @@ -1712,8 +1712,7 @@ subroutine psb_s_csr_reallocate_nz(nz,a) call psb_realloc(max(nz,ione),a%ja,info) if (info == psb_success_) call psb_realloc(max(nz,ione),a%val,info) - if (info == psb_success_) call psb_realloc(& - & max(nz,a%get_nrows()+1,a%get_ncols()+1),a%irp,info) + if (info == psb_success_) call psb_realloc(a%get_nrows()+1,a%irp,info) if (info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) goto 9999 @@ -2962,7 +2961,7 @@ subroutine psb_s_cp_csr_from_coo(a,b,info) call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call tmp%free() else @@ -2980,7 +2979,7 @@ subroutine psb_s_cp_csr_from_coo(a,b,info) call psb_safe_ab_cpy(b%ia,itemp,info) if (info == psb_success_) call psb_safe_ab_cpy(b%ja,a%ja,info) if (info == psb_success_) call psb_safe_ab_cpy(b%val,a%val,info) - if (info == psb_success_) call psb_realloc(max(nr+1,nc+1),a%irp,info) + if (info == psb_success_) call psb_realloc(nr+1,a%irp,info) endif @@ -3131,7 +3130,7 @@ subroutine psb_s_mv_csr_from_coo(a,b,info) call move_alloc(b%ia,itemp) call move_alloc(b%ja,a%ja) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call b%free() diff --git a/base/serial/impl/psb_z_csc_impl.f90 b/base/serial/impl/psb_z_csc_impl.f90 index bd14f8a3..149d3f54 100644 --- a/base/serial/impl/psb_z_csc_impl.f90 +++ b/base/serial/impl/psb_z_csc_impl.f90 @@ -2298,7 +2298,7 @@ subroutine psb_z_mv_csc_from_coo(a,b,info) call move_alloc(b%ja,itemp) call move_alloc(b%ia,a%ia) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%icp,info) + call psb_realloc(nc+1,a%icp,info) call b%free() a%icp(:) = 0 @@ -2570,8 +2570,7 @@ subroutine psb_z_csc_reallocate_nz(nz,a) call psb_realloc(max(nz,ione),a%ia,info) if (info == psb_success_) call psb_realloc(max(nz,ione),a%val,info) - if (info == psb_success_) call psb_realloc(max(nz,a%get_nrows()+1,& - & a%get_ncols()+1), a%icp,info) + if (info == psb_success_) call psb_realloc(a%get_ncols()+1, a%icp,info) if (info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) goto 9999 diff --git a/base/serial/impl/psb_z_csr_impl.f90 b/base/serial/impl/psb_z_csr_impl.f90 index b258eaac..92eef6a8 100644 --- a/base/serial/impl/psb_z_csr_impl.f90 +++ b/base/serial/impl/psb_z_csr_impl.f90 @@ -1712,8 +1712,7 @@ subroutine psb_z_csr_reallocate_nz(nz,a) call psb_realloc(max(nz,ione),a%ja,info) if (info == psb_success_) call psb_realloc(max(nz,ione),a%val,info) - if (info == psb_success_) call psb_realloc(& - & max(nz,a%get_nrows()+1,a%get_ncols()+1),a%irp,info) + if (info == psb_success_) call psb_realloc(a%get_nrows()+1,a%irp,info) if (info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) goto 9999 @@ -2962,7 +2961,7 @@ subroutine psb_z_cp_csr_from_coo(a,b,info) call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call tmp%free() else @@ -2980,7 +2979,7 @@ subroutine psb_z_cp_csr_from_coo(a,b,info) call psb_safe_ab_cpy(b%ia,itemp,info) if (info == psb_success_) call psb_safe_ab_cpy(b%ja,a%ja,info) if (info == psb_success_) call psb_safe_ab_cpy(b%val,a%val,info) - if (info == psb_success_) call psb_realloc(max(nr+1,nc+1),a%irp,info) + if (info == psb_success_) call psb_realloc(nr+1,a%irp,info) endif @@ -3131,7 +3130,7 @@ subroutine psb_z_mv_csr_from_coo(a,b,info) call move_alloc(b%ia,itemp) call move_alloc(b%ja,a%ja) call move_alloc(b%val,a%val) - call psb_realloc(max(nr+1,nc+1),a%irp,info) + call psb_realloc(nr+1,a%irp,info) call b%free() From be30879121517212acf2b02db8c9e0610257cddf Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 19 Jul 2019 11:11:42 +0100 Subject: [PATCH 08/15] Modify handling of lower/upper/symmetric --- base/modules/serial/psb_base_mat_mod.f90 | 27 +++++++- base/modules/serial/psb_c_mat_mod.f90 | 81 +++++++++++++++--------- base/modules/serial/psb_d_mat_mod.f90 | 81 +++++++++++++++--------- base/modules/serial/psb_s_mat_mod.f90 | 81 +++++++++++++++--------- base/modules/serial/psb_z_mat_mod.f90 | 81 +++++++++++++++--------- base/serial/impl/psb_c_csr_impl.f90 | 4 +- base/serial/impl/psb_c_mat_impl.F90 | 29 +++++++++ base/serial/impl/psb_d_csr_impl.f90 | 4 +- base/serial/impl/psb_d_mat_impl.F90 | 29 +++++++++ base/serial/impl/psb_s_csr_impl.f90 | 4 +- base/serial/impl/psb_s_mat_impl.F90 | 29 +++++++++ base/serial/impl/psb_z_csr_impl.f90 | 4 +- base/serial/impl/psb_z_mat_impl.F90 | 29 +++++++++ 13 files changed, 353 insertions(+), 130 deletions(-) diff --git a/base/modules/serial/psb_base_mat_mod.f90 b/base/modules/serial/psb_base_mat_mod.f90 index e03c0e59..9d6eb75c 100644 --- a/base/modules/serial/psb_base_mat_mod.f90 +++ b/base/modules/serial/psb_base_mat_mod.f90 @@ -116,6 +116,8 @@ module psb_base_mat_mod !! can ever be in the BUILD state, hence all other formats !! cannot have duplicate entries. integer(psb_ipk_), private :: duplicate + !> Is the matrix symmetric? (must also be square) + logical, private :: symmetric !> Is the matrix triangular? (must also be square) logical, private :: triangle !> Is the matrix upper or lower? (only if triangular) @@ -152,6 +154,7 @@ module psb_base_mat_mod procedure, pass(a) :: is_upper => psb_base_is_upper procedure, pass(a) :: is_lower => psb_base_is_lower procedure, pass(a) :: is_triangle => psb_base_is_triangle + procedure, pass(a) :: is_symmetric => psb_base_is_symmetric procedure, pass(a) :: is_unit => psb_base_is_unit procedure, pass(a) :: is_by_rows => psb_base_is_by_rows procedure, pass(a) :: is_by_cols => psb_base_is_by_cols @@ -174,6 +177,7 @@ module psb_base_mat_mod procedure, pass(a) :: set_upper => psb_base_set_upper procedure, pass(a) :: set_lower => psb_base_set_lower procedure, pass(a) :: set_triangle => psb_base_set_triangle + procedure, pass(a) :: set_symmetric => psb_base_set_symmetric procedure, pass(a) :: set_unit => psb_base_set_unit procedure, pass(a) :: set_repeatable_updates => psb_base_set_repeatable_updates @@ -586,6 +590,18 @@ contains end if end subroutine psb_base_set_triangle + subroutine psb_base_set_symmetric(a,val) + implicit none + class(psb_base_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: val + + if (present(val)) then + a%symmetric = val + else + a%symmetric = .true. + end if + end subroutine psb_base_set_symmetric + subroutine psb_base_set_unit(a,val) implicit none class(psb_base_sparse_mat), intent(inout) :: a @@ -641,6 +657,13 @@ contains res = a%triangle end function psb_base_is_triangle + function psb_base_is_symmetric(a) result(res) + implicit none + class(psb_base_sparse_mat), intent(in) :: a + logical :: res + res = a%symmetric + end function psb_base_is_symmetric + function psb_base_is_unit(a) result(res) implicit none class(psb_base_sparse_mat), intent(in) :: a @@ -652,14 +675,14 @@ contains implicit none class(psb_base_sparse_mat), intent(in) :: a logical :: res - res = a%upper + res = a%upper .and. a%triangle end function psb_base_is_upper function psb_base_is_lower(a) result(res) implicit none class(psb_base_sparse_mat), intent(in) :: a logical :: res - res = .not.a%upper + res = (.not.a%upper) .and. a%triangle end function psb_base_is_lower function psb_base_is_null(a) result(res) diff --git a/base/modules/serial/psb_c_mat_mod.f90 b/base/modules/serial/psb_c_mat_mod.f90 index ca95ce39..a9c15fb3 100644 --- a/base/modules/serial/psb_c_mat_mod.f90 +++ b/base/modules/serial/psb_c_mat_mod.f90 @@ -81,42 +81,44 @@ module psb_c_mat_mod contains ! Getters - procedure, pass(a) :: get_nrows => psb_c_get_nrows - procedure, pass(a) :: get_ncols => psb_c_get_ncols - procedure, pass(a) :: get_nzeros => psb_c_get_nzeros - procedure, pass(a) :: get_nz_row => psb_c_get_nz_row - procedure, pass(a) :: get_size => psb_c_get_size - procedure, pass(a) :: get_dupl => psb_c_get_dupl - procedure, pass(a) :: is_null => psb_c_is_null - procedure, pass(a) :: is_bld => psb_c_is_bld - procedure, pass(a) :: is_upd => psb_c_is_upd - procedure, pass(a) :: is_asb => psb_c_is_asb - procedure, pass(a) :: is_sorted => psb_c_is_sorted - procedure, pass(a) :: is_by_rows => psb_c_is_by_rows - procedure, pass(a) :: is_by_cols => psb_c_is_by_cols - procedure, pass(a) :: is_upper => psb_c_is_upper - procedure, pass(a) :: is_lower => psb_c_is_lower - procedure, pass(a) :: is_triangle => psb_c_is_triangle + procedure, pass(a) :: get_nrows => psb_c_get_nrows + procedure, pass(a) :: get_ncols => psb_c_get_ncols + procedure, pass(a) :: get_nzeros => psb_c_get_nzeros + procedure, pass(a) :: get_nz_row => psb_c_get_nz_row + procedure, pass(a) :: get_size => psb_c_get_size + procedure, pass(a) :: get_dupl => psb_c_get_dupl + procedure, pass(a) :: is_null => psb_c_is_null + procedure, pass(a) :: is_bld => psb_c_is_bld + procedure, pass(a) :: is_upd => psb_c_is_upd + procedure, pass(a) :: is_asb => psb_c_is_asb + procedure, pass(a) :: is_sorted => psb_c_is_sorted + procedure, pass(a) :: is_by_rows => psb_c_is_by_rows + procedure, pass(a) :: is_by_cols => psb_c_is_by_cols + procedure, pass(a) :: is_upper => psb_c_is_upper + procedure, pass(a) :: is_lower => psb_c_is_lower + procedure, pass(a) :: is_triangle => psb_c_is_triangle + procedure, pass(a) :: is_symmetric => psb_c_is_symmetric procedure, pass(a) :: is_unit => psb_c_is_unit procedure, pass(a) :: is_repeatable_updates => psb_c_is_repeatable_updates procedure, pass(a) :: get_fmt => psb_c_get_fmt procedure, pass(a) :: sizeof => psb_c_sizeof ! Setters - procedure, pass(a) :: set_nrows => psb_c_set_nrows - procedure, pass(a) :: set_ncols => psb_c_set_ncols - procedure, pass(a) :: set_dupl => psb_c_set_dupl - procedure, pass(a) :: set_null => psb_c_set_null - procedure, pass(a) :: set_bld => psb_c_set_bld - procedure, pass(a) :: set_upd => psb_c_set_upd - procedure, pass(a) :: set_asb => psb_c_set_asb - procedure, pass(a) :: set_sorted => psb_c_set_sorted - procedure, pass(a) :: set_upper => psb_c_set_upper - procedure, pass(a) :: set_lower => psb_c_set_lower - procedure, pass(a) :: set_triangle => psb_c_set_triangle - procedure, pass(a) :: set_unit => psb_c_set_unit + procedure, pass(a) :: set_nrows => psb_c_set_nrows + procedure, pass(a) :: set_ncols => psb_c_set_ncols + procedure, pass(a) :: set_dupl => psb_c_set_dupl + procedure, pass(a) :: set_null => psb_c_set_null + procedure, pass(a) :: set_bld => psb_c_set_bld + procedure, pass(a) :: set_upd => psb_c_set_upd + procedure, pass(a) :: set_asb => psb_c_set_asb + procedure, pass(a) :: set_sorted => psb_c_set_sorted + procedure, pass(a) :: set_upper => psb_c_set_upper + procedure, pass(a) :: set_lower => psb_c_set_lower + procedure, pass(a) :: set_triangle => psb_c_set_triangle + procedure, pass(a) :: set_symmetric => psb_c_set_symmetric + procedure, pass(a) :: set_unit => psb_c_set_unit procedure, pass(a) :: set_repeatable_updates => psb_c_set_repeatable_updates - procedure, pass(a) :: has_xt_tri => psb_c_has_xt_tri + procedure, pass(a) :: has_xt_tri => psb_c_has_xt_tri ! Memory/data management @@ -322,6 +324,14 @@ module psb_c_mat_mod end subroutine psb_c_set_triangle end interface + interface + subroutine psb_c_set_symmetric(a,val) + import :: psb_ipk_, psb_cspmat_type + class(psb_cspmat_type), intent(inout) :: a + logical, intent(in), optional :: val + end subroutine psb_c_set_symmetric + end interface + interface subroutine psb_c_set_unit(a,val) import :: psb_ipk_, psb_cspmat_type @@ -1037,6 +1047,19 @@ contains end function psb_c_is_triangle + function psb_c_is_symmetric(a) result(res) + implicit none + class(psb_cspmat_type), intent(in) :: a + logical :: res + + if (allocated(a%a)) then + res = a%a%is_symmetric() + else + res = .false. + end if + + end function psb_c_is_symmetric + function psb_c_is_unit(a) result(res) implicit none class(psb_cspmat_type), intent(in) :: a diff --git a/base/modules/serial/psb_d_mat_mod.f90 b/base/modules/serial/psb_d_mat_mod.f90 index dc57d1f2..a755a9b9 100644 --- a/base/modules/serial/psb_d_mat_mod.f90 +++ b/base/modules/serial/psb_d_mat_mod.f90 @@ -81,42 +81,44 @@ module psb_d_mat_mod contains ! Getters - procedure, pass(a) :: get_nrows => psb_d_get_nrows - procedure, pass(a) :: get_ncols => psb_d_get_ncols - procedure, pass(a) :: get_nzeros => psb_d_get_nzeros - procedure, pass(a) :: get_nz_row => psb_d_get_nz_row - procedure, pass(a) :: get_size => psb_d_get_size - procedure, pass(a) :: get_dupl => psb_d_get_dupl - procedure, pass(a) :: is_null => psb_d_is_null - procedure, pass(a) :: is_bld => psb_d_is_bld - procedure, pass(a) :: is_upd => psb_d_is_upd - procedure, pass(a) :: is_asb => psb_d_is_asb - procedure, pass(a) :: is_sorted => psb_d_is_sorted - procedure, pass(a) :: is_by_rows => psb_d_is_by_rows - procedure, pass(a) :: is_by_cols => psb_d_is_by_cols - procedure, pass(a) :: is_upper => psb_d_is_upper - procedure, pass(a) :: is_lower => psb_d_is_lower - procedure, pass(a) :: is_triangle => psb_d_is_triangle + procedure, pass(a) :: get_nrows => psb_d_get_nrows + procedure, pass(a) :: get_ncols => psb_d_get_ncols + procedure, pass(a) :: get_nzeros => psb_d_get_nzeros + procedure, pass(a) :: get_nz_row => psb_d_get_nz_row + procedure, pass(a) :: get_size => psb_d_get_size + procedure, pass(a) :: get_dupl => psb_d_get_dupl + procedure, pass(a) :: is_null => psb_d_is_null + procedure, pass(a) :: is_bld => psb_d_is_bld + procedure, pass(a) :: is_upd => psb_d_is_upd + procedure, pass(a) :: is_asb => psb_d_is_asb + procedure, pass(a) :: is_sorted => psb_d_is_sorted + procedure, pass(a) :: is_by_rows => psb_d_is_by_rows + procedure, pass(a) :: is_by_cols => psb_d_is_by_cols + procedure, pass(a) :: is_upper => psb_d_is_upper + procedure, pass(a) :: is_lower => psb_d_is_lower + procedure, pass(a) :: is_triangle => psb_d_is_triangle + procedure, pass(a) :: is_symmetric => psb_d_is_symmetric procedure, pass(a) :: is_unit => psb_d_is_unit procedure, pass(a) :: is_repeatable_updates => psb_d_is_repeatable_updates procedure, pass(a) :: get_fmt => psb_d_get_fmt procedure, pass(a) :: sizeof => psb_d_sizeof ! Setters - procedure, pass(a) :: set_nrows => psb_d_set_nrows - procedure, pass(a) :: set_ncols => psb_d_set_ncols - procedure, pass(a) :: set_dupl => psb_d_set_dupl - procedure, pass(a) :: set_null => psb_d_set_null - procedure, pass(a) :: set_bld => psb_d_set_bld - procedure, pass(a) :: set_upd => psb_d_set_upd - procedure, pass(a) :: set_asb => psb_d_set_asb - procedure, pass(a) :: set_sorted => psb_d_set_sorted - procedure, pass(a) :: set_upper => psb_d_set_upper - procedure, pass(a) :: set_lower => psb_d_set_lower - procedure, pass(a) :: set_triangle => psb_d_set_triangle - procedure, pass(a) :: set_unit => psb_d_set_unit + procedure, pass(a) :: set_nrows => psb_d_set_nrows + procedure, pass(a) :: set_ncols => psb_d_set_ncols + procedure, pass(a) :: set_dupl => psb_d_set_dupl + procedure, pass(a) :: set_null => psb_d_set_null + procedure, pass(a) :: set_bld => psb_d_set_bld + procedure, pass(a) :: set_upd => psb_d_set_upd + procedure, pass(a) :: set_asb => psb_d_set_asb + procedure, pass(a) :: set_sorted => psb_d_set_sorted + procedure, pass(a) :: set_upper => psb_d_set_upper + procedure, pass(a) :: set_lower => psb_d_set_lower + procedure, pass(a) :: set_triangle => psb_d_set_triangle + procedure, pass(a) :: set_symmetric => psb_d_set_symmetric + procedure, pass(a) :: set_unit => psb_d_set_unit procedure, pass(a) :: set_repeatable_updates => psb_d_set_repeatable_updates - procedure, pass(a) :: has_xt_tri => psb_d_has_xt_tri + procedure, pass(a) :: has_xt_tri => psb_d_has_xt_tri ! Memory/data management @@ -322,6 +324,14 @@ module psb_d_mat_mod end subroutine psb_d_set_triangle end interface + interface + subroutine psb_d_set_symmetric(a,val) + import :: psb_ipk_, psb_dspmat_type + class(psb_dspmat_type), intent(inout) :: a + logical, intent(in), optional :: val + end subroutine psb_d_set_symmetric + end interface + interface subroutine psb_d_set_unit(a,val) import :: psb_ipk_, psb_dspmat_type @@ -1037,6 +1047,19 @@ contains end function psb_d_is_triangle + function psb_d_is_symmetric(a) result(res) + implicit none + class(psb_dspmat_type), intent(in) :: a + logical :: res + + if (allocated(a%a)) then + res = a%a%is_symmetric() + else + res = .false. + end if + + end function psb_d_is_symmetric + function psb_d_is_unit(a) result(res) implicit none class(psb_dspmat_type), intent(in) :: a diff --git a/base/modules/serial/psb_s_mat_mod.f90 b/base/modules/serial/psb_s_mat_mod.f90 index e5216581..8259674d 100644 --- a/base/modules/serial/psb_s_mat_mod.f90 +++ b/base/modules/serial/psb_s_mat_mod.f90 @@ -81,42 +81,44 @@ module psb_s_mat_mod contains ! Getters - procedure, pass(a) :: get_nrows => psb_s_get_nrows - procedure, pass(a) :: get_ncols => psb_s_get_ncols - procedure, pass(a) :: get_nzeros => psb_s_get_nzeros - procedure, pass(a) :: get_nz_row => psb_s_get_nz_row - procedure, pass(a) :: get_size => psb_s_get_size - procedure, pass(a) :: get_dupl => psb_s_get_dupl - procedure, pass(a) :: is_null => psb_s_is_null - procedure, pass(a) :: is_bld => psb_s_is_bld - procedure, pass(a) :: is_upd => psb_s_is_upd - procedure, pass(a) :: is_asb => psb_s_is_asb - procedure, pass(a) :: is_sorted => psb_s_is_sorted - procedure, pass(a) :: is_by_rows => psb_s_is_by_rows - procedure, pass(a) :: is_by_cols => psb_s_is_by_cols - procedure, pass(a) :: is_upper => psb_s_is_upper - procedure, pass(a) :: is_lower => psb_s_is_lower - procedure, pass(a) :: is_triangle => psb_s_is_triangle + procedure, pass(a) :: get_nrows => psb_s_get_nrows + procedure, pass(a) :: get_ncols => psb_s_get_ncols + procedure, pass(a) :: get_nzeros => psb_s_get_nzeros + procedure, pass(a) :: get_nz_row => psb_s_get_nz_row + procedure, pass(a) :: get_size => psb_s_get_size + procedure, pass(a) :: get_dupl => psb_s_get_dupl + procedure, pass(a) :: is_null => psb_s_is_null + procedure, pass(a) :: is_bld => psb_s_is_bld + procedure, pass(a) :: is_upd => psb_s_is_upd + procedure, pass(a) :: is_asb => psb_s_is_asb + procedure, pass(a) :: is_sorted => psb_s_is_sorted + procedure, pass(a) :: is_by_rows => psb_s_is_by_rows + procedure, pass(a) :: is_by_cols => psb_s_is_by_cols + procedure, pass(a) :: is_upper => psb_s_is_upper + procedure, pass(a) :: is_lower => psb_s_is_lower + procedure, pass(a) :: is_triangle => psb_s_is_triangle + procedure, pass(a) :: is_symmetric => psb_s_is_symmetric procedure, pass(a) :: is_unit => psb_s_is_unit procedure, pass(a) :: is_repeatable_updates => psb_s_is_repeatable_updates procedure, pass(a) :: get_fmt => psb_s_get_fmt procedure, pass(a) :: sizeof => psb_s_sizeof ! Setters - procedure, pass(a) :: set_nrows => psb_s_set_nrows - procedure, pass(a) :: set_ncols => psb_s_set_ncols - procedure, pass(a) :: set_dupl => psb_s_set_dupl - procedure, pass(a) :: set_null => psb_s_set_null - procedure, pass(a) :: set_bld => psb_s_set_bld - procedure, pass(a) :: set_upd => psb_s_set_upd - procedure, pass(a) :: set_asb => psb_s_set_asb - procedure, pass(a) :: set_sorted => psb_s_set_sorted - procedure, pass(a) :: set_upper => psb_s_set_upper - procedure, pass(a) :: set_lower => psb_s_set_lower - procedure, pass(a) :: set_triangle => psb_s_set_triangle - procedure, pass(a) :: set_unit => psb_s_set_unit + procedure, pass(a) :: set_nrows => psb_s_set_nrows + procedure, pass(a) :: set_ncols => psb_s_set_ncols + procedure, pass(a) :: set_dupl => psb_s_set_dupl + procedure, pass(a) :: set_null => psb_s_set_null + procedure, pass(a) :: set_bld => psb_s_set_bld + procedure, pass(a) :: set_upd => psb_s_set_upd + procedure, pass(a) :: set_asb => psb_s_set_asb + procedure, pass(a) :: set_sorted => psb_s_set_sorted + procedure, pass(a) :: set_upper => psb_s_set_upper + procedure, pass(a) :: set_lower => psb_s_set_lower + procedure, pass(a) :: set_triangle => psb_s_set_triangle + procedure, pass(a) :: set_symmetric => psb_s_set_symmetric + procedure, pass(a) :: set_unit => psb_s_set_unit procedure, pass(a) :: set_repeatable_updates => psb_s_set_repeatable_updates - procedure, pass(a) :: has_xt_tri => psb_s_has_xt_tri + procedure, pass(a) :: has_xt_tri => psb_s_has_xt_tri ! Memory/data management @@ -322,6 +324,14 @@ module psb_s_mat_mod end subroutine psb_s_set_triangle end interface + interface + subroutine psb_s_set_symmetric(a,val) + import :: psb_ipk_, psb_sspmat_type + class(psb_sspmat_type), intent(inout) :: a + logical, intent(in), optional :: val + end subroutine psb_s_set_symmetric + end interface + interface subroutine psb_s_set_unit(a,val) import :: psb_ipk_, psb_sspmat_type @@ -1037,6 +1047,19 @@ contains end function psb_s_is_triangle + function psb_s_is_symmetric(a) result(res) + implicit none + class(psb_sspmat_type), intent(in) :: a + logical :: res + + if (allocated(a%a)) then + res = a%a%is_symmetric() + else + res = .false. + end if + + end function psb_s_is_symmetric + function psb_s_is_unit(a) result(res) implicit none class(psb_sspmat_type), intent(in) :: a diff --git a/base/modules/serial/psb_z_mat_mod.f90 b/base/modules/serial/psb_z_mat_mod.f90 index ba4f277b..7c805505 100644 --- a/base/modules/serial/psb_z_mat_mod.f90 +++ b/base/modules/serial/psb_z_mat_mod.f90 @@ -81,42 +81,44 @@ module psb_z_mat_mod contains ! Getters - procedure, pass(a) :: get_nrows => psb_z_get_nrows - procedure, pass(a) :: get_ncols => psb_z_get_ncols - procedure, pass(a) :: get_nzeros => psb_z_get_nzeros - procedure, pass(a) :: get_nz_row => psb_z_get_nz_row - procedure, pass(a) :: get_size => psb_z_get_size - procedure, pass(a) :: get_dupl => psb_z_get_dupl - procedure, pass(a) :: is_null => psb_z_is_null - procedure, pass(a) :: is_bld => psb_z_is_bld - procedure, pass(a) :: is_upd => psb_z_is_upd - procedure, pass(a) :: is_asb => psb_z_is_asb - procedure, pass(a) :: is_sorted => psb_z_is_sorted - procedure, pass(a) :: is_by_rows => psb_z_is_by_rows - procedure, pass(a) :: is_by_cols => psb_z_is_by_cols - procedure, pass(a) :: is_upper => psb_z_is_upper - procedure, pass(a) :: is_lower => psb_z_is_lower - procedure, pass(a) :: is_triangle => psb_z_is_triangle + procedure, pass(a) :: get_nrows => psb_z_get_nrows + procedure, pass(a) :: get_ncols => psb_z_get_ncols + procedure, pass(a) :: get_nzeros => psb_z_get_nzeros + procedure, pass(a) :: get_nz_row => psb_z_get_nz_row + procedure, pass(a) :: get_size => psb_z_get_size + procedure, pass(a) :: get_dupl => psb_z_get_dupl + procedure, pass(a) :: is_null => psb_z_is_null + procedure, pass(a) :: is_bld => psb_z_is_bld + procedure, pass(a) :: is_upd => psb_z_is_upd + procedure, pass(a) :: is_asb => psb_z_is_asb + procedure, pass(a) :: is_sorted => psb_z_is_sorted + procedure, pass(a) :: is_by_rows => psb_z_is_by_rows + procedure, pass(a) :: is_by_cols => psb_z_is_by_cols + procedure, pass(a) :: is_upper => psb_z_is_upper + procedure, pass(a) :: is_lower => psb_z_is_lower + procedure, pass(a) :: is_triangle => psb_z_is_triangle + procedure, pass(a) :: is_symmetric => psb_z_is_symmetric procedure, pass(a) :: is_unit => psb_z_is_unit procedure, pass(a) :: is_repeatable_updates => psb_z_is_repeatable_updates procedure, pass(a) :: get_fmt => psb_z_get_fmt procedure, pass(a) :: sizeof => psb_z_sizeof ! Setters - procedure, pass(a) :: set_nrows => psb_z_set_nrows - procedure, pass(a) :: set_ncols => psb_z_set_ncols - procedure, pass(a) :: set_dupl => psb_z_set_dupl - procedure, pass(a) :: set_null => psb_z_set_null - procedure, pass(a) :: set_bld => psb_z_set_bld - procedure, pass(a) :: set_upd => psb_z_set_upd - procedure, pass(a) :: set_asb => psb_z_set_asb - procedure, pass(a) :: set_sorted => psb_z_set_sorted - procedure, pass(a) :: set_upper => psb_z_set_upper - procedure, pass(a) :: set_lower => psb_z_set_lower - procedure, pass(a) :: set_triangle => psb_z_set_triangle - procedure, pass(a) :: set_unit => psb_z_set_unit + procedure, pass(a) :: set_nrows => psb_z_set_nrows + procedure, pass(a) :: set_ncols => psb_z_set_ncols + procedure, pass(a) :: set_dupl => psb_z_set_dupl + procedure, pass(a) :: set_null => psb_z_set_null + procedure, pass(a) :: set_bld => psb_z_set_bld + procedure, pass(a) :: set_upd => psb_z_set_upd + procedure, pass(a) :: set_asb => psb_z_set_asb + procedure, pass(a) :: set_sorted => psb_z_set_sorted + procedure, pass(a) :: set_upper => psb_z_set_upper + procedure, pass(a) :: set_lower => psb_z_set_lower + procedure, pass(a) :: set_triangle => psb_z_set_triangle + procedure, pass(a) :: set_symmetric => psb_z_set_symmetric + procedure, pass(a) :: set_unit => psb_z_set_unit procedure, pass(a) :: set_repeatable_updates => psb_z_set_repeatable_updates - procedure, pass(a) :: has_xt_tri => psb_z_has_xt_tri + procedure, pass(a) :: has_xt_tri => psb_z_has_xt_tri ! Memory/data management @@ -322,6 +324,14 @@ module psb_z_mat_mod end subroutine psb_z_set_triangle end interface + interface + subroutine psb_z_set_symmetric(a,val) + import :: psb_ipk_, psb_zspmat_type + class(psb_zspmat_type), intent(inout) :: a + logical, intent(in), optional :: val + end subroutine psb_z_set_symmetric + end interface + interface subroutine psb_z_set_unit(a,val) import :: psb_ipk_, psb_zspmat_type @@ -1037,6 +1047,19 @@ contains end function psb_z_is_triangle + function psb_z_is_symmetric(a) result(res) + implicit none + class(psb_zspmat_type), intent(in) :: a + logical :: res + + if (allocated(a%a)) then + res = a%a%is_symmetric() + else + res = .false. + end if + + end function psb_z_is_symmetric + function psb_z_is_unit(a) result(res) implicit none class(psb_zspmat_type), intent(in) :: a diff --git a/base/serial/impl/psb_c_csr_impl.f90 b/base/serial/impl/psb_c_csr_impl.f90 index 9ad735d5..1ad2224e 100644 --- a/base/serial/impl/psb_c_csr_impl.f90 +++ b/base/serial/impl/psb_c_csr_impl.f90 @@ -999,8 +999,6 @@ contains end subroutine psb_c_csr_cssv - - subroutine psb_c_csr_cssm(alpha,a,x,beta,y,info,trans) use psb_error_mod use psb_string_mod @@ -3385,7 +3383,7 @@ subroutine psb_ccsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = int(1.25*(nza+nzb)) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) diff --git a/base/serial/impl/psb_c_mat_impl.F90 b/base/serial/impl/psb_c_mat_impl.F90 index 54ba0f11..629b7113 100644 --- a/base/serial/impl/psb_c_mat_impl.F90 +++ b/base/serial/impl/psb_c_mat_impl.F90 @@ -328,6 +328,35 @@ subroutine psb_c_set_triangle(a,val) end subroutine psb_c_set_triangle +subroutine psb_c_set_symmetric(a,val) + use psb_c_mat_mod, psb_protect_name => psb_c_set_symmetric + use psb_error_mod + implicit none + class(psb_cspmat_type), intent(inout) :: a + logical, intent(in), optional :: val + integer(psb_ipk_) :: err_act, info + character(len=20) :: name='get_nzeros' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (.not.allocated(a%a)) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + call a%a%set_symmetric(val) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_set_symmetric + subroutine psb_c_set_unit(a,val) use psb_c_mat_mod, psb_protect_name => psb_c_set_unit diff --git a/base/serial/impl/psb_d_csr_impl.f90 b/base/serial/impl/psb_d_csr_impl.f90 index 47c4de8e..861d413b 100644 --- a/base/serial/impl/psb_d_csr_impl.f90 +++ b/base/serial/impl/psb_d_csr_impl.f90 @@ -999,8 +999,6 @@ contains end subroutine psb_d_csr_cssv - - subroutine psb_d_csr_cssm(alpha,a,x,beta,y,info,trans) use psb_error_mod use psb_string_mod @@ -3385,7 +3383,7 @@ subroutine psb_dcsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = int(1.25*(nza+nzb)) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) diff --git a/base/serial/impl/psb_d_mat_impl.F90 b/base/serial/impl/psb_d_mat_impl.F90 index 2d772286..0a72b48e 100644 --- a/base/serial/impl/psb_d_mat_impl.F90 +++ b/base/serial/impl/psb_d_mat_impl.F90 @@ -328,6 +328,35 @@ subroutine psb_d_set_triangle(a,val) end subroutine psb_d_set_triangle +subroutine psb_d_set_symmetric(a,val) + use psb_d_mat_mod, psb_protect_name => psb_d_set_symmetric + use psb_error_mod + implicit none + class(psb_dspmat_type), intent(inout) :: a + logical, intent(in), optional :: val + integer(psb_ipk_) :: err_act, info + character(len=20) :: name='get_nzeros' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (.not.allocated(a%a)) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + call a%a%set_symmetric(val) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_set_symmetric + subroutine psb_d_set_unit(a,val) use psb_d_mat_mod, psb_protect_name => psb_d_set_unit diff --git a/base/serial/impl/psb_s_csr_impl.f90 b/base/serial/impl/psb_s_csr_impl.f90 index 424d54e0..1897db70 100644 --- a/base/serial/impl/psb_s_csr_impl.f90 +++ b/base/serial/impl/psb_s_csr_impl.f90 @@ -999,8 +999,6 @@ contains end subroutine psb_s_csr_cssv - - subroutine psb_s_csr_cssm(alpha,a,x,beta,y,info,trans) use psb_error_mod use psb_string_mod @@ -3385,7 +3383,7 @@ subroutine psb_scsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = int(1.25*(nza+nzb)) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) diff --git a/base/serial/impl/psb_s_mat_impl.F90 b/base/serial/impl/psb_s_mat_impl.F90 index 20ff4b40..c5923395 100644 --- a/base/serial/impl/psb_s_mat_impl.F90 +++ b/base/serial/impl/psb_s_mat_impl.F90 @@ -328,6 +328,35 @@ subroutine psb_s_set_triangle(a,val) end subroutine psb_s_set_triangle +subroutine psb_s_set_symmetric(a,val) + use psb_s_mat_mod, psb_protect_name => psb_s_set_symmetric + use psb_error_mod + implicit none + class(psb_sspmat_type), intent(inout) :: a + logical, intent(in), optional :: val + integer(psb_ipk_) :: err_act, info + character(len=20) :: name='get_nzeros' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (.not.allocated(a%a)) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + call a%a%set_symmetric(val) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_set_symmetric + subroutine psb_s_set_unit(a,val) use psb_s_mat_mod, psb_protect_name => psb_s_set_unit diff --git a/base/serial/impl/psb_z_csr_impl.f90 b/base/serial/impl/psb_z_csr_impl.f90 index 92eef6a8..ddc0a3df 100644 --- a/base/serial/impl/psb_z_csr_impl.f90 +++ b/base/serial/impl/psb_z_csr_impl.f90 @@ -999,8 +999,6 @@ contains end subroutine psb_z_csr_cssv - - subroutine psb_z_csr_cssm(alpha,a,x,beta,y,info,trans) use psb_error_mod use psb_string_mod @@ -3385,7 +3383,7 @@ subroutine psb_zcsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = int(1.25*(nza+nzb)) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) diff --git a/base/serial/impl/psb_z_mat_impl.F90 b/base/serial/impl/psb_z_mat_impl.F90 index 2a2e1b21..96ebec84 100644 --- a/base/serial/impl/psb_z_mat_impl.F90 +++ b/base/serial/impl/psb_z_mat_impl.F90 @@ -328,6 +328,35 @@ subroutine psb_z_set_triangle(a,val) end subroutine psb_z_set_triangle +subroutine psb_z_set_symmetric(a,val) + use psb_z_mat_mod, psb_protect_name => psb_z_set_symmetric + use psb_error_mod + implicit none + class(psb_zspmat_type), intent(inout) :: a + logical, intent(in), optional :: val + integer(psb_ipk_) :: err_act, info + character(len=20) :: name='get_nzeros' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (.not.allocated(a%a)) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + call a%a%set_symmetric(val) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_set_symmetric + subroutine psb_z_set_unit(a,val) use psb_z_mat_mod, psb_protect_name => psb_z_set_unit From 5e03cacdec1012140278185def4667b6a00ab1fc Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 19 Jul 2019 14:37:36 +0100 Subject: [PATCH 09/15] New SCAN collective, only for SUM right now. New arg in CDALL to force HASH over GEN_BLOCK --- base/modules/psi_reduce_mod.F90 | 63 +++++++++++++++++++++++++++++++++ base/tools/psb_cdall.f90 | 49 +++++++++++++------------ 2 files changed, 90 insertions(+), 22 deletions(-) diff --git a/base/modules/psi_reduce_mod.F90 b/base/modules/psi_reduce_mod.F90 index 597d8f8e..8322df55 100644 --- a/base/modules/psi_reduce_mod.F90 +++ b/base/modules/psi_reduce_mod.F90 @@ -182,6 +182,14 @@ module psi_reduce_mod end interface #endif + interface psb_scan_sum + module procedure psb_iscan_sums + end interface psb_scan_sum + + interface psb_exscan_sum + module procedure psb_iexscan_sums + end interface psb_exscan_sum + contains @@ -5586,4 +5594,59 @@ contains end subroutine psb_d_nrm2v_ic #endif + + + ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + ! + ! SCAN + ! + ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + subroutine psb_iscan_sums(ictxt,dat) +#ifdef MPI_MOD + use mpi +#endif + implicit none +#ifdef MPI_H + include 'mpif.h' +#endif + integer(psb_mpik_), intent(in) :: ictxt + integer(psb_ipk_), intent(inout) :: dat + integer(psb_ipk_) :: dat_ + integer(psb_mpik_) :: iam, np, info + integer(psb_ipk_) :: iinfo + + +#if !defined(SERIAL_MPI) + call psb_info(ictxt,iam,np) + call mpi_scan(dat,dat_,1,psb_mpi_ipk_integer,mpi_sum,ictxt,info) + dat = dat_ +#endif + end subroutine psb_iscan_sums + + + subroutine psb_iexscan_sums(ictxt,dat) +#ifdef MPI_MOD + use mpi +#endif + implicit none +#ifdef MPI_H + include 'mpif.h' +#endif + integer(psb_mpik_), intent(in) :: ictxt + integer(psb_ipk_), intent(inout) :: dat + integer(psb_ipk_) :: dat_ + integer(psb_mpik_) :: iam, np, info + integer(psb_ipk_) :: iinfo + + +#if !defined(SERIAL_MPI) + call psb_info(ictxt,iam,np) + call mpi_scan(dat,dat_,1,psb_mpi_ipk_integer,mpi_sum,ictxt,info) + dat = dat_ +#else + dat = 0 +#endif + end subroutine psb_iexscan_sums + end module psi_reduce_mod diff --git a/base/tools/psb_cdall.f90 b/base/tools/psb_cdall.f90 index 557fea2c..bebf51bb 100644 --- a/base/tools/psb_cdall.f90 +++ b/base/tools/psb_cdall.f90 @@ -50,7 +50,7 @@ subroutine psb_cdall(ictxt, desc, info,mg,ng,parts,vg,vl,flag,nl,repl,globalchec character(len=20) :: name integer(psb_ipk_) :: err_act, n_, flag_, i, me, np, nlp, nnv, lr logical :: usehash_ - integer(psb_ipk_), allocatable :: itmpsz(:) + integer(psb_ipk_), allocatable :: itmpv(:), lvl(:) integer(psb_mpik_) :: iictxt @@ -136,35 +136,40 @@ subroutine psb_cdall(ictxt, desc, info,mg,ng,parts,vg,vl,flag,nl,repl,globalchec else usehash_ = .false. end if - if (usehash_) then - write(0,*) 'Fix usehash_ implementationt ' - end if - if (np == 1) then - allocate(psb_repl_map :: desc%indxmap, stat=info) + if (usehash_) then + nlp = nl + call psb_exscan_sum(ictxt,nlp) + lvl = [ (i,i=1,nl) ] + nlp + call psb_cd_inloc(lvl(1:nl),ictxt,desc,info, globalcheck=.false.) + else - allocate(psb_gen_block_map :: desc%indxmap, stat=info) - end if - if (info == psb_success_) then - select type(aa => desc%indxmap) - type is (psb_repl_map) - call aa%repl_map_init(iictxt,nl,info) - type is (psb_gen_block_map) - call aa%gen_block_map_init(iictxt,nl,info) - class default - ! This cannot happen - info = psb_err_internal_error_ - goto 9999 - end select + if (np == 1) then + allocate(psb_repl_map :: desc%indxmap, stat=info) + else + allocate(psb_gen_block_map :: desc%indxmap, stat=info) + end if + if (info == psb_success_) then + select type(aa => desc%indxmap) + type is (psb_repl_map) + call aa%repl_map_init(iictxt,nl,info) + type is (psb_gen_block_map) + call aa%gen_block_map_init(iictxt,nl,info) + class default + ! This cannot happen + info = psb_err_internal_error_ + goto 9999 + end select + end if end if - call psb_realloc(1,itmpsz, info) + call psb_realloc(1,itmpv, info) if (info /= 0) then write(0,*) 'Error reallocating itmspz' goto 9999 end if - itmpsz(:) = -1 - call psi_bld_tmpovrl(itmpsz,desc,info) + itmpv(:) = -1 + call psi_bld_tmpovrl(itmpv,desc,info) endif From 8fd9f70626dd403146771e354df300103dfdcca9 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 19 Jul 2019 14:37:49 +0100 Subject: [PATCH 10/15] csrd has xt_tri. --- base/modules/serial/psb_c_csr_mat_mod.f90 | 12 ++++++++++++ base/modules/serial/psb_d_csr_mat_mod.f90 | 12 ++++++++++++ base/modules/serial/psb_s_csr_mat_mod.f90 | 12 ++++++++++++ base/modules/serial/psb_z_csr_mat_mod.f90 | 12 ++++++++++++ 4 files changed, 48 insertions(+) diff --git a/base/modules/serial/psb_c_csr_mat_mod.f90 b/base/modules/serial/psb_c_csr_mat_mod.f90 index 0cbe5218..e9ac3247 100644 --- a/base/modules/serial/psb_c_csr_mat_mod.f90 +++ b/base/modules/serial/psb_c_csr_mat_mod.f90 @@ -624,6 +624,7 @@ module psb_c_csr_mat_mod procedure, pass(a) :: trim => psb_c_csrd_trim procedure, pass(a) :: free => c_csrd_free procedure, pass(a) :: mold => psb_c_csrd_mold + procedure, nopass :: has_xt_tri => c_csrd_has_xt_tri end type psb_c_csrd_sparse_mat @@ -872,5 +873,16 @@ contains end subroutine c_csrd_free + ! + ! has_xt_tri: does the current type support + ! extended triangle operations? + ! + function c_csrd_has_xt_tri() result(res) + implicit none + logical :: res + + res = .true. + end function c_csrd_has_xt_tri + end module psb_c_csr_mat_mod diff --git a/base/modules/serial/psb_d_csr_mat_mod.f90 b/base/modules/serial/psb_d_csr_mat_mod.f90 index 8b573652..cac76757 100644 --- a/base/modules/serial/psb_d_csr_mat_mod.f90 +++ b/base/modules/serial/psb_d_csr_mat_mod.f90 @@ -624,6 +624,7 @@ module psb_d_csr_mat_mod procedure, pass(a) :: trim => psb_d_csrd_trim procedure, pass(a) :: free => d_csrd_free procedure, pass(a) :: mold => psb_d_csrd_mold + procedure, nopass :: has_xt_tri => d_csrd_has_xt_tri end type psb_d_csrd_sparse_mat @@ -872,5 +873,16 @@ contains end subroutine d_csrd_free + ! + ! has_xt_tri: does the current type support + ! extended triangle operations? + ! + function d_csrd_has_xt_tri() result(res) + implicit none + logical :: res + + res = .true. + end function d_csrd_has_xt_tri + end module psb_d_csr_mat_mod diff --git a/base/modules/serial/psb_s_csr_mat_mod.f90 b/base/modules/serial/psb_s_csr_mat_mod.f90 index 2432481c..a6c40cd7 100644 --- a/base/modules/serial/psb_s_csr_mat_mod.f90 +++ b/base/modules/serial/psb_s_csr_mat_mod.f90 @@ -624,6 +624,7 @@ module psb_s_csr_mat_mod procedure, pass(a) :: trim => psb_s_csrd_trim procedure, pass(a) :: free => s_csrd_free procedure, pass(a) :: mold => psb_s_csrd_mold + procedure, nopass :: has_xt_tri => s_csrd_has_xt_tri end type psb_s_csrd_sparse_mat @@ -872,5 +873,16 @@ contains end subroutine s_csrd_free + ! + ! has_xt_tri: does the current type support + ! extended triangle operations? + ! + function s_csrd_has_xt_tri() result(res) + implicit none + logical :: res + + res = .true. + end function s_csrd_has_xt_tri + end module psb_s_csr_mat_mod diff --git a/base/modules/serial/psb_z_csr_mat_mod.f90 b/base/modules/serial/psb_z_csr_mat_mod.f90 index 341dc77a..c42e6c5f 100644 --- a/base/modules/serial/psb_z_csr_mat_mod.f90 +++ b/base/modules/serial/psb_z_csr_mat_mod.f90 @@ -624,6 +624,7 @@ module psb_z_csr_mat_mod procedure, pass(a) :: trim => psb_z_csrd_trim procedure, pass(a) :: free => z_csrd_free procedure, pass(a) :: mold => psb_z_csrd_mold + procedure, nopass :: has_xt_tri => z_csrd_has_xt_tri end type psb_z_csrd_sparse_mat @@ -872,5 +873,16 @@ contains end subroutine z_csrd_free + ! + ! has_xt_tri: does the current type support + ! extended triangle operations? + ! + function z_csrd_has_xt_tri() result(res) + implicit none + logical :: res + + res = .true. + end function z_csrd_has_xt_tri + end module psb_z_csr_mat_mod From 0460968f5d6f9678765c6a84b906c853ca4efc31 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 19 Jul 2019 14:38:02 +0100 Subject: [PATCH 11/15] Partial fix for ROOT in SPGATHER. --- base/comm/psb_cspgather.F90 | 7 ++++++- base/comm/psb_dspgather.F90 | 7 ++++++- base/comm/psb_sspgather.F90 | 7 ++++++- base/comm/psb_zspgather.F90 | 7 ++++++- 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/base/comm/psb_cspgather.F90 b/base/comm/psb_cspgather.F90 index be1ddbe0..c4ea2728 100644 --- a/base/comm/psb_cspgather.F90 +++ b/base/comm/psb_cspgather.F90 @@ -57,7 +57,7 @@ subroutine psb_csp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep integer(psb_ipk_) :: err_act, dupl_, nrg, ncg, nzg integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k, nzl logical :: keepnum_, keeploc_ - integer(psb_mpik_) :: ictxt,np,me + integer(psb_mpik_) :: ictxt,np,me, root_ integer(psb_mpik_) :: icomm, minfo, ndx integer(psb_mpik_), allocatable :: nzbr(:), idisp(:) integer(psb_ipk_) :: ierr(5) @@ -88,6 +88,11 @@ subroutine psb_csp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep else p_desc_c => desc_a end if + if (present(root)) then + root_ = root + else + root_ = -1 + end if call globa%free() diff --git a/base/comm/psb_dspgather.F90 b/base/comm/psb_dspgather.F90 index 59d0d080..f928d822 100644 --- a/base/comm/psb_dspgather.F90 +++ b/base/comm/psb_dspgather.F90 @@ -57,7 +57,7 @@ subroutine psb_dsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep integer(psb_ipk_) :: err_act, dupl_, nrg, ncg, nzg integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k, nzl logical :: keepnum_, keeploc_ - integer(psb_mpik_) :: ictxt,np,me + integer(psb_mpik_) :: ictxt,np,me, root_ integer(psb_mpik_) :: icomm, minfo, ndx integer(psb_mpik_), allocatable :: nzbr(:), idisp(:) integer(psb_ipk_) :: ierr(5) @@ -88,6 +88,11 @@ subroutine psb_dsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep else p_desc_c => desc_a end if + if (present(root)) then + root_ = root + else + root_ = -1 + end if call globa%free() diff --git a/base/comm/psb_sspgather.F90 b/base/comm/psb_sspgather.F90 index b974a2e7..e09eb26b 100644 --- a/base/comm/psb_sspgather.F90 +++ b/base/comm/psb_sspgather.F90 @@ -57,7 +57,7 @@ subroutine psb_ssp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep integer(psb_ipk_) :: err_act, dupl_, nrg, ncg, nzg integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k, nzl logical :: keepnum_, keeploc_ - integer(psb_mpik_) :: ictxt,np,me + integer(psb_mpik_) :: ictxt,np,me, root_ integer(psb_mpik_) :: icomm, minfo, ndx integer(psb_mpik_), allocatable :: nzbr(:), idisp(:) integer(psb_ipk_) :: ierr(5) @@ -88,6 +88,11 @@ subroutine psb_ssp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep else p_desc_c => desc_a end if + if (present(root)) then + root_ = root + else + root_ = -1 + end if call globa%free() diff --git a/base/comm/psb_zspgather.F90 b/base/comm/psb_zspgather.F90 index 7bcb72e7..090b568d 100644 --- a/base/comm/psb_zspgather.F90 +++ b/base/comm/psb_zspgather.F90 @@ -57,7 +57,7 @@ subroutine psb_zsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep integer(psb_ipk_) :: err_act, dupl_, nrg, ncg, nzg integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k, nzl logical :: keepnum_, keeploc_ - integer(psb_mpik_) :: ictxt,np,me + integer(psb_mpik_) :: ictxt,np,me, root_ integer(psb_mpik_) :: icomm, minfo, ndx integer(psb_mpik_), allocatable :: nzbr(:), idisp(:) integer(psb_ipk_) :: ierr(5) @@ -88,6 +88,11 @@ subroutine psb_zsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep else p_desc_c => desc_a end if + if (present(root)) then + root_ = root + else + root_ = -1 + end if call globa%free() From ffb6a6dd68e12eb0d08003b693338ce7e84e40d4 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Mon, 22 Jul 2019 17:16:10 +0100 Subject: [PATCH 12/15] Fix handling of SYMmetric --- base/modules/serial/psb_base_mat_mod.f90 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/base/modules/serial/psb_base_mat_mod.f90 b/base/modules/serial/psb_base_mat_mod.f90 index 9d6eb75c..cf87a4e7 100644 --- a/base/modules/serial/psb_base_mat_mod.f90 +++ b/base/modules/serial/psb_base_mat_mod.f90 @@ -770,6 +770,7 @@ contains b%state = a%state b%duplicate = a%duplicate b%triangle = a%triangle + b%symmetric = a%symmetric b%unitd = a%unitd b%upper = .not.a%upper b%sorted = .false. @@ -789,6 +790,7 @@ contains b%state = a%state b%duplicate = a%duplicate b%triangle = a%triangle + b%symmetric = a%symmetric b%unitd = a%unitd b%upper = .not.a%upper b%sorted = .false. @@ -808,6 +810,7 @@ contains a%state = a%state a%duplicate = a%duplicate a%triangle = a%triangle + a%symmetric = a%symmetric a%unitd = a%unitd a%upper = .not.a%upper a%sorted = .false. From e0c40e042ea97c2a6898e634a55e09fd58d1042c Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Mon, 29 Jul 2019 14:17:32 +0100 Subject: [PATCH 13/15] SAVE module variables for timers. --- base/modules/psb_timers_mod.f90 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/base/modules/psb_timers_mod.f90 b/base/modules/psb_timers_mod.f90 index 1fe11162..5b2a8054 100644 --- a/base/modules/psb_timers_mod.f90 +++ b/base/modules/psb_timers_mod.f90 @@ -63,7 +63,8 @@ module psb_timers_mod type(psb_string_item), allocatable :: timers_descr(:) logical :: wanted(timer_entries_) type(psb_string_item) :: entries_descr(timer_entries_) - + save :: nsamples, timers, timers_descr, wanted, entries_descr + interface psb_realloc module procedure psb_string_item_realloc end interface psb_realloc From f79104a6d15babe765bba0c9d7b161e3d3eefd53 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 2 Aug 2019 09:35:34 +0100 Subject: [PATCH 14/15] Fix return statement in FCG --- krylov/psb_cfcg.F90 | 2 +- krylov/psb_dfcg.F90 | 2 +- krylov/psb_sfcg.F90 | 2 +- krylov/psb_zfcg.F90 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/krylov/psb_cfcg.F90 b/krylov/psb_cfcg.F90 index 1f2d895d..09ab8127 100644 --- a/krylov/psb_cfcg.F90 +++ b/krylov/psb_cfcg.F90 @@ -305,7 +305,7 @@ subroutine psb_cfcg_vect(a,prec,b,x,eps,desc_a,info,& call psb_end_conv(methdname,itx ,desc_a,stopdat,info,derr,iter) if (present(err)) err = derr - + return 9999 continue call psb_erractionrestore(err_act) if (err_act.eq.psb_act_abort_) then diff --git a/krylov/psb_dfcg.F90 b/krylov/psb_dfcg.F90 index bdb336d5..6f664238 100644 --- a/krylov/psb_dfcg.F90 +++ b/krylov/psb_dfcg.F90 @@ -305,7 +305,7 @@ subroutine psb_dfcg_vect(a,prec,b,x,eps,desc_a,info,& call psb_end_conv(methdname,itx ,desc_a,stopdat,info,derr,iter) if (present(err)) err = derr - + return 9999 continue call psb_erractionrestore(err_act) if (err_act.eq.psb_act_abort_) then diff --git a/krylov/psb_sfcg.F90 b/krylov/psb_sfcg.F90 index 5b4e6957..c5b883cc 100644 --- a/krylov/psb_sfcg.F90 +++ b/krylov/psb_sfcg.F90 @@ -305,7 +305,7 @@ subroutine psb_sfcg_vect(a,prec,b,x,eps,desc_a,info,& call psb_end_conv(methdname,itx ,desc_a,stopdat,info,derr,iter) if (present(err)) err = derr - + return 9999 continue call psb_erractionrestore(err_act) if (err_act.eq.psb_act_abort_) then diff --git a/krylov/psb_zfcg.F90 b/krylov/psb_zfcg.F90 index cb9289bd..ca856f6b 100644 --- a/krylov/psb_zfcg.F90 +++ b/krylov/psb_zfcg.F90 @@ -305,7 +305,7 @@ subroutine psb_zfcg_vect(a,prec,b,x,eps,desc_a,info,& call psb_end_conv(methdname,itx ,desc_a,stopdat,info,derr,iter) if (present(err)) err = derr - + return 9999 continue call psb_erractionrestore(err_act) if (err_act.eq.psb_act_abort_) then From 0309f00ada48df9fdfd8ca628264487450f5365b Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Wed, 7 Aug 2019 16:22:57 +0100 Subject: [PATCH 15/15] New psb_par_spspmm method for parallel triple matrix product. --- base/modules/tools/psb_c_tools_mod.f90 | 15 ++- base/modules/tools/psb_d_tools_mod.f90 | 15 ++- base/modules/tools/psb_s_tools_mod.f90 | 15 ++- base/modules/tools/psb_z_tools_mod.f90 | 15 ++- base/tools/Makefile | 5 +- base/tools/psb_c_par_csr_spspmm.f90 | 156 +++++++++++++++++++++++++ base/tools/psb_d_par_csr_spspmm.f90 | 156 +++++++++++++++++++++++++ base/tools/psb_s_par_csr_spspmm.f90 | 156 +++++++++++++++++++++++++ base/tools/psb_z_par_csr_spspmm.f90 | 156 +++++++++++++++++++++++++ 9 files changed, 684 insertions(+), 5 deletions(-) create mode 100644 base/tools/psb_c_par_csr_spspmm.f90 create mode 100644 base/tools/psb_d_par_csr_spspmm.f90 create mode 100644 base/tools/psb_s_par_csr_spspmm.f90 create mode 100644 base/tools/psb_z_par_csr_spspmm.f90 diff --git a/base/modules/tools/psb_c_tools_mod.f90 b/base/modules/tools/psb_c_tools_mod.f90 index 2259cfaa..7dfa1585 100644 --- a/base/modules/tools/psb_c_tools_mod.f90 +++ b/base/modules/tools/psb_c_tools_mod.f90 @@ -363,7 +363,6 @@ Module psb_c_tools_mod end subroutine psb_cspins_2desc end interface - interface psb_sprn subroutine psb_csprn(a, desc_a,info,clear) import @@ -374,5 +373,19 @@ Module psb_c_tools_mod logical, intent(in), optional :: clear end subroutine psb_csprn end interface + + interface psb_par_spspmm + subroutine psb_c_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) + import :: psb_c_csr_sparse_mat, psb_desc_type, psb_ipk_ + Implicit None + type(psb_c_csr_sparse_mat),intent(in) :: acsr + type(psb_c_csr_sparse_mat),intent(inout) :: bcsr + type(psb_c_csr_sparse_mat),intent(out) :: ccsr + type(psb_desc_type),intent(in) :: desc_a + type(psb_desc_type),intent(inout) :: desc_c + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: data + End Subroutine psb_c_par_csr_spspmm + end interface psb_par_spspmm end module psb_c_tools_mod diff --git a/base/modules/tools/psb_d_tools_mod.f90 b/base/modules/tools/psb_d_tools_mod.f90 index 8f0e4824..94e9762c 100644 --- a/base/modules/tools/psb_d_tools_mod.f90 +++ b/base/modules/tools/psb_d_tools_mod.f90 @@ -363,7 +363,6 @@ Module psb_d_tools_mod end subroutine psb_dspins_2desc end interface - interface psb_sprn subroutine psb_dsprn(a, desc_a,info,clear) import @@ -374,5 +373,19 @@ Module psb_d_tools_mod logical, intent(in), optional :: clear end subroutine psb_dsprn end interface + + interface psb_par_spspmm + subroutine psb_d_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) + import :: psb_d_csr_sparse_mat, psb_desc_type, psb_ipk_ + Implicit None + type(psb_d_csr_sparse_mat),intent(in) :: acsr + type(psb_d_csr_sparse_mat),intent(inout) :: bcsr + type(psb_d_csr_sparse_mat),intent(out) :: ccsr + type(psb_desc_type),intent(in) :: desc_a + type(psb_desc_type),intent(inout) :: desc_c + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: data + End Subroutine psb_d_par_csr_spspmm + end interface psb_par_spspmm end module psb_d_tools_mod diff --git a/base/modules/tools/psb_s_tools_mod.f90 b/base/modules/tools/psb_s_tools_mod.f90 index a02e4629..6ba4fd5b 100644 --- a/base/modules/tools/psb_s_tools_mod.f90 +++ b/base/modules/tools/psb_s_tools_mod.f90 @@ -363,7 +363,6 @@ Module psb_s_tools_mod end subroutine psb_sspins_2desc end interface - interface psb_sprn subroutine psb_ssprn(a, desc_a,info,clear) import @@ -374,5 +373,19 @@ Module psb_s_tools_mod logical, intent(in), optional :: clear end subroutine psb_ssprn end interface + + interface psb_par_spspmm + subroutine psb_s_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) + import :: psb_s_csr_sparse_mat, psb_desc_type, psb_ipk_ + Implicit None + type(psb_s_csr_sparse_mat),intent(in) :: acsr + type(psb_s_csr_sparse_mat),intent(inout) :: bcsr + type(psb_s_csr_sparse_mat),intent(out) :: ccsr + type(psb_desc_type),intent(in) :: desc_a + type(psb_desc_type),intent(inout) :: desc_c + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: data + End Subroutine psb_s_par_csr_spspmm + end interface psb_par_spspmm end module psb_s_tools_mod diff --git a/base/modules/tools/psb_z_tools_mod.f90 b/base/modules/tools/psb_z_tools_mod.f90 index a3d2bed1..b5dca25a 100644 --- a/base/modules/tools/psb_z_tools_mod.f90 +++ b/base/modules/tools/psb_z_tools_mod.f90 @@ -363,7 +363,6 @@ Module psb_z_tools_mod end subroutine psb_zspins_2desc end interface - interface psb_sprn subroutine psb_zsprn(a, desc_a,info,clear) import @@ -374,5 +373,19 @@ Module psb_z_tools_mod logical, intent(in), optional :: clear end subroutine psb_zsprn end interface + + interface psb_par_spspmm + subroutine psb_z_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) + import :: psb_z_csr_sparse_mat, psb_desc_type, psb_ipk_ + Implicit None + type(psb_z_csr_sparse_mat),intent(in) :: acsr + type(psb_z_csr_sparse_mat),intent(inout) :: bcsr + type(psb_z_csr_sparse_mat),intent(out) :: ccsr + type(psb_desc_type),intent(in) :: desc_a + type(psb_desc_type),intent(inout) :: desc_c + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: data + End Subroutine psb_z_par_csr_spspmm + end interface psb_par_spspmm end module psb_z_tools_mod diff --git a/base/tools/Makefile b/base/tools/Makefile index fc3e7411..12cf0099 100644 --- a/base/tools/Makefile +++ b/base/tools/Makefile @@ -19,7 +19,10 @@ FOBJS = psb_sallc.o psb_sasb.o \ psb_cspalloc.o psb_cspasb.o psb_cspfree.o\ psb_callc.o psb_casb.o psb_cfree.o psb_cins.o \ psb_cspins.o psb_csprn.o psb_cd_set_bld.o \ - psb_s_map.o psb_d_map.o psb_c_map.o psb_z_map.o + psb_s_map.o psb_d_map.o psb_c_map.o psb_z_map.o \ + psb_s_par_csr_spspmm.o psb_d_par_csr_spspmm.o \ + psb_c_par_csr_spspmm.o psb_z_par_csr_spspmm.o + MPFOBJS = psb_icdasb.o psb_ssphalo.o psb_dsphalo.o psb_csphalo.o psb_zsphalo.o \ psb_dcdbldext.o psb_zcdbldext.o psb_scdbldext.o psb_ccdbldext.o diff --git a/base/tools/psb_c_par_csr_spspmm.f90 b/base/tools/psb_c_par_csr_spspmm.f90 new file mode 100644 index 00000000..ba4cdb7a --- /dev/null +++ b/base/tools/psb_c_par_csr_spspmm.f90 @@ -0,0 +1,156 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_c_par_csr_spspmm.f90 +! +! Subroutine: psb_c_par_csr_spspmm +! Version: complex +! +! This routine computes a parallel product of two sparse matrices +! +! C = A * B +! +! where all the matrices are stored in CSR. On input and output the matrices +! are stored with column indices in local numbering, but inermediate quantities +! are in global numbering because gathering the halo of B to multiply it +! by A implies a potential enlargement of the support. +! Also, B may have a column index space different from its row index space, +! which is obviously the same as the column space of A. +! +! +! Arguments: +! acsr - type(psb_c_csr_sparse_mat), input. +! The sparse matrix structure A +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the column space of A +! bcsr - type(psb_c_csr_sparse_mat), input/output. +! The sparse matrix structure B, gets row-extended on output +! ccsr - type(psb_c_csr_sparse_mat), output +! The sparse matrix structure C +! desc_c - type(psb_desc_type), input/output. +! The communication descriptor of the column space of B +! +! info - integer, output. +! Error code. +! + +Subroutine psb_c_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) + use psb_base_mod, psb_protect_name => psb_c_par_csr_spspmm + Implicit None + + type(psb_c_csr_sparse_mat),intent(in) :: acsr + type(psb_c_csr_sparse_mat),intent(inout) :: bcsr + type(psb_c_csr_sparse_mat),intent(out) :: ccsr + type(psb_desc_type),intent(in) :: desc_a + type(psb_desc_type),intent(inout) :: desc_c + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: data + ! ...local scalars.... + integer(psb_ipk_) :: ictxt, np,me + integer(psb_ipk_) :: ncol, nnz + type(psb_c_csr_sparse_mat) :: tcsr1 + logical :: update_desc_c + integer(psb_ipk_) :: debug_level, debug_unit, err_act + character(len=20) :: name, ch_err + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + name='psb_c_p_csr_spspmm' + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + ictxt = desc_a%get_context() + + call psb_info(ictxt, me, np) + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),': Start' + + update_desc_c = desc_c%is_bld() + + ! + ! This is a bit tricky. + ! DESC_A is the descriptor of (the columns of) A, and therefore + ! of the rows of B; the columns of B, in the intended usage, span + ! a different space for which we have DESC_C. + ! We are gathering the halo rows of B to multiply by A; + ! now, the columns of B would ideally be kept in + ! global numbering, so that we can call this repeatedly to accumulate + ! the product of multiple operators, and convert to local numbering + ! at the last possible moment. However, this would imply calling + ! the serial SPSPMM with a matrix B with the GLOBAL number of columns + ! and this could be very expensive in memory. The solution is to keep B + ! in local numbering, so that only columns really appearing count, but to + ! expand the descriptor when gathering the halo, because by performing + ! the products we are extending the support of the operator; hence + ! this routine is intended to be called with a temporary descriptor + ! DESC_C which is in the BUILD state, to allow for such expansion + ! across multiple products. + ! The caller will at some later point finalize the descriptor DESC_C. + ! + + ncol = desc_a%get_local_cols() + call psb_sphalo(bcsr,desc_a,tcsr1,info,& + & colcnv=.true.,rowscale=.true.,outcol_glob=.true.,col_desc=desc_c,data=data) + nnz = tcsr1%get_nzeros() + if (update_desc_c) then + call desc_c%indxmap%g2lip_ins(tcsr1%ja(1:nnz),info) + else + call desc_c%indxmap%g2lip(tcsr1%ja(1:nnz),info) + end if + if (info == psb_success_) call psb_rwextd(ncol,bcsr,info,b=tcsr1) + if (info == psb_success_) call tcsr1%free() + if(info /= psb_success_) then + call psb_errpush(psb_err_internal_error_,name,a_err='Extend am3') + goto 9999 + end if + call bcsr%set_ncols(desc_c%get_local_cols()) + + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'starting spspmm 3' + if (debug_level >= psb_debug_outer_) write(debug_unit,*) me,' ',trim(name),& + & 'starting spspmm ',acsr%get_nrows(),acsr%get_ncols(),bcsr%get_nrows(),bcsr%get_ncols() + call psb_spspmm(acsr,bcsr,ccsr,info) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return + +End Subroutine psb_c_par_csr_spspmm diff --git a/base/tools/psb_d_par_csr_spspmm.f90 b/base/tools/psb_d_par_csr_spspmm.f90 new file mode 100644 index 00000000..3d13de7d --- /dev/null +++ b/base/tools/psb_d_par_csr_spspmm.f90 @@ -0,0 +1,156 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_d_par_csr_spspmm.f90 +! +! Subroutine: psb_d_par_csr_spspmm +! Version: real +! +! This routine computes a parallel product of two sparse matrices +! +! C = A * B +! +! where all the matrices are stored in CSR. On input and output the matrices +! are stored with column indices in local numbering, but inermediate quantities +! are in global numbering because gathering the halo of B to multiply it +! by A implies a potential enlargement of the support. +! Also, B may have a column index space different from its row index space, +! which is obviously the same as the column space of A. +! +! +! Arguments: +! acsr - type(psb_d_csr_sparse_mat), input. +! The sparse matrix structure A +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the column space of A +! bcsr - type(psb_d_csr_sparse_mat), input/output. +! The sparse matrix structure B, gets row-extended on output +! ccsr - type(psb_d_csr_sparse_mat), output +! The sparse matrix structure C +! desc_c - type(psb_desc_type), input/output. +! The communication descriptor of the column space of B +! +! info - integer, output. +! Error code. +! + +Subroutine psb_d_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) + use psb_base_mod, psb_protect_name => psb_d_par_csr_spspmm + Implicit None + + type(psb_d_csr_sparse_mat),intent(in) :: acsr + type(psb_d_csr_sparse_mat),intent(inout) :: bcsr + type(psb_d_csr_sparse_mat),intent(out) :: ccsr + type(psb_desc_type),intent(in) :: desc_a + type(psb_desc_type),intent(inout) :: desc_c + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: data + ! ...local scalars.... + integer(psb_ipk_) :: ictxt, np,me + integer(psb_ipk_) :: ncol, nnz + type(psb_d_csr_sparse_mat) :: tcsr1 + logical :: update_desc_c + integer(psb_ipk_) :: debug_level, debug_unit, err_act + character(len=20) :: name, ch_err + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + name='psb_d_p_csr_spspmm' + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + ictxt = desc_a%get_context() + + call psb_info(ictxt, me, np) + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),': Start' + + update_desc_c = desc_c%is_bld() + + ! + ! This is a bit tricky. + ! DESC_A is the descriptor of (the columns of) A, and therefore + ! of the rows of B; the columns of B, in the intended usage, span + ! a different space for which we have DESC_C. + ! We are gathering the halo rows of B to multiply by A; + ! now, the columns of B would ideally be kept in + ! global numbering, so that we can call this repeatedly to accumulate + ! the product of multiple operators, and convert to local numbering + ! at the last possible moment. However, this would imply calling + ! the serial SPSPMM with a matrix B with the GLOBAL number of columns + ! and this could be very expensive in memory. The solution is to keep B + ! in local numbering, so that only columns really appearing count, but to + ! expand the descriptor when gathering the halo, because by performing + ! the products we are extending the support of the operator; hence + ! this routine is intended to be called with a temporary descriptor + ! DESC_C which is in the BUILD state, to allow for such expansion + ! across multiple products. + ! The caller will at some later point finalize the descriptor DESC_C. + ! + + ncol = desc_a%get_local_cols() + call psb_sphalo(bcsr,desc_a,tcsr1,info,& + & colcnv=.true.,rowscale=.true.,outcol_glob=.true.,col_desc=desc_c,data=data) + nnz = tcsr1%get_nzeros() + if (update_desc_c) then + call desc_c%indxmap%g2lip_ins(tcsr1%ja(1:nnz),info) + else + call desc_c%indxmap%g2lip(tcsr1%ja(1:nnz),info) + end if + if (info == psb_success_) call psb_rwextd(ncol,bcsr,info,b=tcsr1) + if (info == psb_success_) call tcsr1%free() + if(info /= psb_success_) then + call psb_errpush(psb_err_internal_error_,name,a_err='Extend am3') + goto 9999 + end if + call bcsr%set_ncols(desc_c%get_local_cols()) + + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'starting spspmm 3' + if (debug_level >= psb_debug_outer_) write(debug_unit,*) me,' ',trim(name),& + & 'starting spspmm ',acsr%get_nrows(),acsr%get_ncols(),bcsr%get_nrows(),bcsr%get_ncols() + call psb_spspmm(acsr,bcsr,ccsr,info) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return + +End Subroutine psb_d_par_csr_spspmm diff --git a/base/tools/psb_s_par_csr_spspmm.f90 b/base/tools/psb_s_par_csr_spspmm.f90 new file mode 100644 index 00000000..30530b41 --- /dev/null +++ b/base/tools/psb_s_par_csr_spspmm.f90 @@ -0,0 +1,156 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_s_par_csr_spspmm.f90 +! +! Subroutine: psb_s_par_csr_spspmm +! Version: real +! +! This routine computes a parallel product of two sparse matrices +! +! C = A * B +! +! where all the matrices are stored in CSR. On input and output the matrices +! are stored with column indices in local numbering, but inermediate quantities +! are in global numbering because gathering the halo of B to multiply it +! by A implies a potential enlargement of the support. +! Also, B may have a column index space different from its row index space, +! which is obviously the same as the column space of A. +! +! +! Arguments: +! acsr - type(psb_s_csr_sparse_mat), input. +! The sparse matrix structure A +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the column space of A +! bcsr - type(psb_s_csr_sparse_mat), input/output. +! The sparse matrix structure B, gets row-extended on output +! ccsr - type(psb_s_csr_sparse_mat), output +! The sparse matrix structure C +! desc_c - type(psb_desc_type), input/output. +! The communication descriptor of the column space of B +! +! info - integer, output. +! Error code. +! + +Subroutine psb_s_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) + use psb_base_mod, psb_protect_name => psb_s_par_csr_spspmm + Implicit None + + type(psb_s_csr_sparse_mat),intent(in) :: acsr + type(psb_s_csr_sparse_mat),intent(inout) :: bcsr + type(psb_s_csr_sparse_mat),intent(out) :: ccsr + type(psb_desc_type),intent(in) :: desc_a + type(psb_desc_type),intent(inout) :: desc_c + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: data + ! ...local scalars.... + integer(psb_ipk_) :: ictxt, np,me + integer(psb_ipk_) :: ncol, nnz + type(psb_s_csr_sparse_mat) :: tcsr1 + logical :: update_desc_c + integer(psb_ipk_) :: debug_level, debug_unit, err_act + character(len=20) :: name, ch_err + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + name='psb_s_p_csr_spspmm' + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + ictxt = desc_a%get_context() + + call psb_info(ictxt, me, np) + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),': Start' + + update_desc_c = desc_c%is_bld() + + ! + ! This is a bit tricky. + ! DESC_A is the descriptor of (the columns of) A, and therefore + ! of the rows of B; the columns of B, in the intended usage, span + ! a different space for which we have DESC_C. + ! We are gathering the halo rows of B to multiply by A; + ! now, the columns of B would ideally be kept in + ! global numbering, so that we can call this repeatedly to accumulate + ! the product of multiple operators, and convert to local numbering + ! at the last possible moment. However, this would imply calling + ! the serial SPSPMM with a matrix B with the GLOBAL number of columns + ! and this could be very expensive in memory. The solution is to keep B + ! in local numbering, so that only columns really appearing count, but to + ! expand the descriptor when gathering the halo, because by performing + ! the products we are extending the support of the operator; hence + ! this routine is intended to be called with a temporary descriptor + ! DESC_C which is in the BUILD state, to allow for such expansion + ! across multiple products. + ! The caller will at some later point finalize the descriptor DESC_C. + ! + + ncol = desc_a%get_local_cols() + call psb_sphalo(bcsr,desc_a,tcsr1,info,& + & colcnv=.true.,rowscale=.true.,outcol_glob=.true.,col_desc=desc_c,data=data) + nnz = tcsr1%get_nzeros() + if (update_desc_c) then + call desc_c%indxmap%g2lip_ins(tcsr1%ja(1:nnz),info) + else + call desc_c%indxmap%g2lip(tcsr1%ja(1:nnz),info) + end if + if (info == psb_success_) call psb_rwextd(ncol,bcsr,info,b=tcsr1) + if (info == psb_success_) call tcsr1%free() + if(info /= psb_success_) then + call psb_errpush(psb_err_internal_error_,name,a_err='Extend am3') + goto 9999 + end if + call bcsr%set_ncols(desc_c%get_local_cols()) + + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'starting spspmm 3' + if (debug_level >= psb_debug_outer_) write(debug_unit,*) me,' ',trim(name),& + & 'starting spspmm ',acsr%get_nrows(),acsr%get_ncols(),bcsr%get_nrows(),bcsr%get_ncols() + call psb_spspmm(acsr,bcsr,ccsr,info) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return + +End Subroutine psb_s_par_csr_spspmm diff --git a/base/tools/psb_z_par_csr_spspmm.f90 b/base/tools/psb_z_par_csr_spspmm.f90 new file mode 100644 index 00000000..4ec9adab --- /dev/null +++ b/base/tools/psb_z_par_csr_spspmm.f90 @@ -0,0 +1,156 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_z_par_csr_spspmm.f90 +! +! Subroutine: psb_z_par_csr_spspmm +! Version: complex +! +! This routine computes a parallel product of two sparse matrices +! +! C = A * B +! +! where all the matrices are stored in CSR. On input and output the matrices +! are stored with column indices in local numbering, but inermediate quantities +! are in global numbering because gathering the halo of B to multiply it +! by A implies a potential enlargement of the support. +! Also, B may have a column index space different from its row index space, +! which is obviously the same as the column space of A. +! +! +! Arguments: +! acsr - type(psb_z_csr_sparse_mat), input. +! The sparse matrix structure A +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the column space of A +! bcsr - type(psb_z_csr_sparse_mat), input/output. +! The sparse matrix structure B, gets row-extended on output +! ccsr - type(psb_z_csr_sparse_mat), output +! The sparse matrix structure C +! desc_c - type(psb_desc_type), input/output. +! The communication descriptor of the column space of B +! +! info - integer, output. +! Error code. +! + +Subroutine psb_z_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) + use psb_base_mod, psb_protect_name => psb_z_par_csr_spspmm + Implicit None + + type(psb_z_csr_sparse_mat),intent(in) :: acsr + type(psb_z_csr_sparse_mat),intent(inout) :: bcsr + type(psb_z_csr_sparse_mat),intent(out) :: ccsr + type(psb_desc_type),intent(in) :: desc_a + type(psb_desc_type),intent(inout) :: desc_c + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: data + ! ...local scalars.... + integer(psb_ipk_) :: ictxt, np,me + integer(psb_ipk_) :: ncol, nnz + type(psb_z_csr_sparse_mat) :: tcsr1 + logical :: update_desc_c + integer(psb_ipk_) :: debug_level, debug_unit, err_act + character(len=20) :: name, ch_err + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + name='psb_z_p_csr_spspmm' + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ ; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + ictxt = desc_a%get_context() + + call psb_info(ictxt, me, np) + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),': Start' + + update_desc_c = desc_c%is_bld() + + ! + ! This is a bit tricky. + ! DESC_A is the descriptor of (the columns of) A, and therefore + ! of the rows of B; the columns of B, in the intended usage, span + ! a different space for which we have DESC_C. + ! We are gathering the halo rows of B to multiply by A; + ! now, the columns of B would ideally be kept in + ! global numbering, so that we can call this repeatedly to accumulate + ! the product of multiple operators, and convert to local numbering + ! at the last possible moment. However, this would imply calling + ! the serial SPSPMM with a matrix B with the GLOBAL number of columns + ! and this could be very expensive in memory. The solution is to keep B + ! in local numbering, so that only columns really appearing count, but to + ! expand the descriptor when gathering the halo, because by performing + ! the products we are extending the support of the operator; hence + ! this routine is intended to be called with a temporary descriptor + ! DESC_C which is in the BUILD state, to allow for such expansion + ! across multiple products. + ! The caller will at some later point finalize the descriptor DESC_C. + ! + + ncol = desc_a%get_local_cols() + call psb_sphalo(bcsr,desc_a,tcsr1,info,& + & colcnv=.true.,rowscale=.true.,outcol_glob=.true.,col_desc=desc_c,data=data) + nnz = tcsr1%get_nzeros() + if (update_desc_c) then + call desc_c%indxmap%g2lip_ins(tcsr1%ja(1:nnz),info) + else + call desc_c%indxmap%g2lip(tcsr1%ja(1:nnz),info) + end if + if (info == psb_success_) call psb_rwextd(ncol,bcsr,info,b=tcsr1) + if (info == psb_success_) call tcsr1%free() + if(info /= psb_success_) then + call psb_errpush(psb_err_internal_error_,name,a_err='Extend am3') + goto 9999 + end if + call bcsr%set_ncols(desc_c%get_local_cols()) + + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'starting spspmm 3' + if (debug_level >= psb_debug_outer_) write(debug_unit,*) me,' ',trim(name),& + & 'starting spspmm ',acsr%get_nrows(),acsr%get_ncols(),bcsr%get_nrows(),bcsr%get_ncols() + call psb_spspmm(acsr,bcsr,ccsr,info) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return + +End Subroutine psb_z_par_csr_spspmm