From 00df8edd06179d821723a67d7e7a16bf1e92fd4a Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Mon, 8 Jul 2019 15:59:15 +0100 Subject: [PATCH] Improve memory usage in fix_coo, and mv_to/mv_from CSR/CSC. --- base/serial/impl/psb_c_coo_impl.f90 | 23 ++++++++++++++++++----- base/serial/impl/psb_c_csc_impl.f90 | 2 +- base/serial/impl/psb_c_csr_impl.f90 | 8 +++++--- base/serial/impl/psb_d_coo_impl.f90 | 23 ++++++++++++++++++----- base/serial/impl/psb_d_csc_impl.f90 | 2 +- base/serial/impl/psb_d_csr_impl.f90 | 8 +++++--- base/serial/impl/psb_s_coo_impl.f90 | 23 ++++++++++++++++++----- base/serial/impl/psb_s_csc_impl.f90 | 2 +- base/serial/impl/psb_s_csr_impl.f90 | 8 +++++--- base/serial/impl/psb_z_coo_impl.f90 | 23 ++++++++++++++++++----- base/serial/impl/psb_z_csc_impl.f90 | 2 +- base/serial/impl/psb_z_csr_impl.f90 | 8 +++++--- 12 files changed, 96 insertions(+), 36 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.f90 b/base/serial/impl/psb_c_coo_impl.f90 index 695e3583..3d0db0b3 100644 --- a/base/serial/impl/psb_c_coo_impl.f90 +++ b/base/serial/impl/psb_c_coo_impl.f90 @@ -3416,21 +3416,26 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) dupl_ = dupl - - allocate(iaux(max(nr,nc,nzin)+2),stat=info) + allocate(iaux(nzin+2),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if - - allocate(ias(nzin),jas(nzin),vs(nzin),ix2(max(nr,nc,nzin)+2), stat=info) - use_buffers = (info == 0) select case(idir_) case(psb_row_major_) ! Row major order + + if (nr <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then if (.not.( (ia(1) < 1).or.(ia(1)> nr)) ) then iaux(:) = 0 @@ -3752,6 +3757,14 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) case(psb_col_major_) + if (nc <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then iaux(:) = 0 if (.not.( (ja(1) < 1).or.(ja(1)> nc)) ) then diff --git a/base/serial/impl/psb_c_csc_impl.f90 b/base/serial/impl/psb_c_csc_impl.f90 index d9483937..23e8f4a8 100644 --- a/base/serial/impl/psb_c_csc_impl.f90 +++ b/base/serial/impl/psb_c_csc_impl.f90 @@ -2298,7 +2298,7 @@ subroutine psb_c_mv_csc_from_coo(a,b,info) call move_alloc(b%ja,itemp) call move_alloc(b%ia,a%ia) call move_alloc(b%val,a%val) - call psb_realloc(nc+1,a%icp,info) + call psb_realloc(max(nr+1,nc+1),a%icp,info) call b%free() a%icp(:) = 0 diff --git a/base/serial/impl/psb_c_csr_impl.f90 b/base/serial/impl/psb_c_csr_impl.f90 index 472f0a8f..e248c5e1 100644 --- a/base/serial/impl/psb_c_csr_impl.f90 +++ b/base/serial/impl/psb_c_csr_impl.f90 @@ -999,6 +999,8 @@ contains end subroutine psb_c_csr_cssv + + subroutine psb_c_csr_cssm(alpha,a,x,beta,y,info,trans) use psb_error_mod use psb_string_mod @@ -2960,7 +2962,7 @@ subroutine psb_c_cp_csr_from_coo(a,b,info) call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%val,a%val) - call psb_realloc(nr+1,a%irp,info) + call psb_realloc(max(nr+1,nc+1),a%irp,info) call tmp%free() else @@ -3129,7 +3131,7 @@ subroutine psb_c_mv_csr_from_coo(a,b,info) call move_alloc(b%ia,itemp) call move_alloc(b%ja,a%ja) call move_alloc(b%val,a%val) - call psb_realloc(nr+1,a%irp,info) + call psb_realloc(max(nr+1,nc+1),a%irp,info) call b%free() @@ -3384,7 +3386,7 @@ subroutine psb_ccsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = int(1.25*(nza+nzb)) + nzc = 2*(nza+nzb) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) diff --git a/base/serial/impl/psb_d_coo_impl.f90 b/base/serial/impl/psb_d_coo_impl.f90 index 47c0b107..89aa8b97 100644 --- a/base/serial/impl/psb_d_coo_impl.f90 +++ b/base/serial/impl/psb_d_coo_impl.f90 @@ -3416,21 +3416,26 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) dupl_ = dupl - - allocate(iaux(max(nr,nc,nzin)+2),stat=info) + allocate(iaux(nzin+2),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if - - allocate(ias(nzin),jas(nzin),vs(nzin),ix2(max(nr,nc,nzin)+2), stat=info) - use_buffers = (info == 0) select case(idir_) case(psb_row_major_) ! Row major order + + if (nr <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then if (.not.( (ia(1) < 1).or.(ia(1)> nr)) ) then iaux(:) = 0 @@ -3752,6 +3757,14 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) case(psb_col_major_) + if (nc <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then iaux(:) = 0 if (.not.( (ja(1) < 1).or.(ja(1)> nc)) ) then diff --git a/base/serial/impl/psb_d_csc_impl.f90 b/base/serial/impl/psb_d_csc_impl.f90 index 25085229..ef68164d 100644 --- a/base/serial/impl/psb_d_csc_impl.f90 +++ b/base/serial/impl/psb_d_csc_impl.f90 @@ -2298,7 +2298,7 @@ subroutine psb_d_mv_csc_from_coo(a,b,info) call move_alloc(b%ja,itemp) call move_alloc(b%ia,a%ia) call move_alloc(b%val,a%val) - call psb_realloc(nc+1,a%icp,info) + call psb_realloc(max(nr+1,nc+1),a%icp,info) call b%free() a%icp(:) = 0 diff --git a/base/serial/impl/psb_d_csr_impl.f90 b/base/serial/impl/psb_d_csr_impl.f90 index d06c4649..a6fb0b18 100644 --- a/base/serial/impl/psb_d_csr_impl.f90 +++ b/base/serial/impl/psb_d_csr_impl.f90 @@ -999,6 +999,8 @@ contains end subroutine psb_d_csr_cssv + + subroutine psb_d_csr_cssm(alpha,a,x,beta,y,info,trans) use psb_error_mod use psb_string_mod @@ -2960,7 +2962,7 @@ subroutine psb_d_cp_csr_from_coo(a,b,info) call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%val,a%val) - call psb_realloc(nr+1,a%irp,info) + call psb_realloc(max(nr+1,nc+1),a%irp,info) call tmp%free() else @@ -3129,7 +3131,7 @@ subroutine psb_d_mv_csr_from_coo(a,b,info) call move_alloc(b%ia,itemp) call move_alloc(b%ja,a%ja) call move_alloc(b%val,a%val) - call psb_realloc(nr+1,a%irp,info) + call psb_realloc(max(nr+1,nc+1),a%irp,info) call b%free() @@ -3384,7 +3386,7 @@ subroutine psb_dcsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = int(1.25*(nza+nzb)) + nzc = 2*(nza+nzb) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) diff --git a/base/serial/impl/psb_s_coo_impl.f90 b/base/serial/impl/psb_s_coo_impl.f90 index b80a8a99..d327e12e 100644 --- a/base/serial/impl/psb_s_coo_impl.f90 +++ b/base/serial/impl/psb_s_coo_impl.f90 @@ -3416,21 +3416,26 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) dupl_ = dupl - - allocate(iaux(max(nr,nc,nzin)+2),stat=info) + allocate(iaux(nzin+2),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if - - allocate(ias(nzin),jas(nzin),vs(nzin),ix2(max(nr,nc,nzin)+2), stat=info) - use_buffers = (info == 0) select case(idir_) case(psb_row_major_) ! Row major order + + if (nr <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then if (.not.( (ia(1) < 1).or.(ia(1)> nr)) ) then iaux(:) = 0 @@ -3752,6 +3757,14 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) case(psb_col_major_) + if (nc <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then iaux(:) = 0 if (.not.( (ja(1) < 1).or.(ja(1)> nc)) ) then diff --git a/base/serial/impl/psb_s_csc_impl.f90 b/base/serial/impl/psb_s_csc_impl.f90 index 6eb0d830..6318db9d 100644 --- a/base/serial/impl/psb_s_csc_impl.f90 +++ b/base/serial/impl/psb_s_csc_impl.f90 @@ -2298,7 +2298,7 @@ subroutine psb_s_mv_csc_from_coo(a,b,info) call move_alloc(b%ja,itemp) call move_alloc(b%ia,a%ia) call move_alloc(b%val,a%val) - call psb_realloc(nc+1,a%icp,info) + call psb_realloc(max(nr+1,nc+1),a%icp,info) call b%free() a%icp(:) = 0 diff --git a/base/serial/impl/psb_s_csr_impl.f90 b/base/serial/impl/psb_s_csr_impl.f90 index e2fc8298..a5bca393 100644 --- a/base/serial/impl/psb_s_csr_impl.f90 +++ b/base/serial/impl/psb_s_csr_impl.f90 @@ -999,6 +999,8 @@ contains end subroutine psb_s_csr_cssv + + subroutine psb_s_csr_cssm(alpha,a,x,beta,y,info,trans) use psb_error_mod use psb_string_mod @@ -2960,7 +2962,7 @@ subroutine psb_s_cp_csr_from_coo(a,b,info) call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%val,a%val) - call psb_realloc(nr+1,a%irp,info) + call psb_realloc(max(nr+1,nc+1),a%irp,info) call tmp%free() else @@ -3129,7 +3131,7 @@ subroutine psb_s_mv_csr_from_coo(a,b,info) call move_alloc(b%ia,itemp) call move_alloc(b%ja,a%ja) call move_alloc(b%val,a%val) - call psb_realloc(nr+1,a%irp,info) + call psb_realloc(max(nr+1,nc+1),a%irp,info) call b%free() @@ -3384,7 +3386,7 @@ subroutine psb_scsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = int(1.25*(nza+nzb)) + nzc = 2*(nza+nzb) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) diff --git a/base/serial/impl/psb_z_coo_impl.f90 b/base/serial/impl/psb_z_coo_impl.f90 index e1883e03..d5ee9740 100644 --- a/base/serial/impl/psb_z_coo_impl.f90 +++ b/base/serial/impl/psb_z_coo_impl.f90 @@ -3416,21 +3416,26 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) dupl_ = dupl - - allocate(iaux(max(nr,nc,nzin)+2),stat=info) + allocate(iaux(nzin+2),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if - - allocate(ias(nzin),jas(nzin),vs(nzin),ix2(max(nr,nc,nzin)+2), stat=info) - use_buffers = (info == 0) select case(idir_) case(psb_row_major_) ! Row major order + + if (nr <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then if (.not.( (ia(1) < 1).or.(ia(1)> nr)) ) then iaux(:) = 0 @@ -3752,6 +3757,14 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) case(psb_col_major_) + if (nc <= nzin) then + ! Avoid strange situations with large indices + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then iaux(:) = 0 if (.not.( (ja(1) < 1).or.(ja(1)> nc)) ) then diff --git a/base/serial/impl/psb_z_csc_impl.f90 b/base/serial/impl/psb_z_csc_impl.f90 index 2d0fa913..bd14f8a3 100644 --- a/base/serial/impl/psb_z_csc_impl.f90 +++ b/base/serial/impl/psb_z_csc_impl.f90 @@ -2298,7 +2298,7 @@ subroutine psb_z_mv_csc_from_coo(a,b,info) call move_alloc(b%ja,itemp) call move_alloc(b%ia,a%ia) call move_alloc(b%val,a%val) - call psb_realloc(nc+1,a%icp,info) + call psb_realloc(max(nr+1,nc+1),a%icp,info) call b%free() a%icp(:) = 0 diff --git a/base/serial/impl/psb_z_csr_impl.f90 b/base/serial/impl/psb_z_csr_impl.f90 index 015931c6..b258eaac 100644 --- a/base/serial/impl/psb_z_csr_impl.f90 +++ b/base/serial/impl/psb_z_csr_impl.f90 @@ -999,6 +999,8 @@ contains end subroutine psb_z_csr_cssv + + subroutine psb_z_csr_cssm(alpha,a,x,beta,y,info,trans) use psb_error_mod use psb_string_mod @@ -2960,7 +2962,7 @@ subroutine psb_z_cp_csr_from_coo(a,b,info) call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%val,a%val) - call psb_realloc(nr+1,a%irp,info) + call psb_realloc(max(nr+1,nc+1),a%irp,info) call tmp%free() else @@ -3129,7 +3131,7 @@ subroutine psb_z_mv_csr_from_coo(a,b,info) call move_alloc(b%ia,itemp) call move_alloc(b%ja,a%ja) call move_alloc(b%val,a%val) - call psb_realloc(nr+1,a%irp,info) + call psb_realloc(max(nr+1,nc+1),a%irp,info) call b%free() @@ -3384,7 +3386,7 @@ subroutine psb_zcsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = int(1.25*(nza+nzb)) + nzc = 2*(nza+nzb) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info)