From 4b4d1f06b6099a99163a6c6e94d84fda6e7af7be Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 1 Oct 2013 14:52:15 +0000 Subject: [PATCH] psblas-3.99: base/psblas/psb_cspmm.f90 base/psblas/psb_cspsm.f90 base/psblas/psb_dspmm.f90 base/psblas/psb_dspsm.f90 base/psblas/psb_sspmm.f90 base/psblas/psb_sspsm.f90 base/psblas/psb_zspmm.f90 base/psblas/psb_zspsm.f90 base/tools/psb_cspins.f90 base/tools/psb_dspins.f90 base/tools/psb_sspins.f90 base/tools/psb_zspins.f90 Performance fixes in spins/cdins, take 1. Vector checks in _sm, _mm _vect --- base/psblas/psb_cspmm.f90 | 1 - base/psblas/psb_cspsm.f90 | 47 +++++---------------------------------- base/psblas/psb_dspmm.f90 | 1 - base/psblas/psb_dspsm.f90 | 47 +++++---------------------------------- base/psblas/psb_sspmm.f90 | 1 - base/psblas/psb_sspsm.f90 | 47 +++++---------------------------------- base/psblas/psb_zspmm.f90 | 1 - base/psblas/psb_zspsm.f90 | 47 +++++---------------------------------- base/tools/psb_cspins.f90 | 22 ++++++------------ base/tools/psb_dspins.f90 | 22 ++++++------------ base/tools/psb_sspins.f90 | 22 ++++++------------ base/tools/psb_zspins.f90 | 22 ++++++------------ 12 files changed, 52 insertions(+), 228 deletions(-) diff --git a/base/psblas/psb_cspmm.f90 b/base/psblas/psb_cspmm.f90 index bdf6e685..5636eb08 100644 --- a/base/psblas/psb_cspmm.f90 +++ b/base/psblas/psb_cspmm.f90 @@ -765,7 +765,6 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& call psb_errpush(info,name,a_err=ch_err) goto 9999 end if - iwork => null() ! check for presence/size of a work area diff --git a/base/psblas/psb_cspsm.f90 b/base/psblas/psb_cspsm.f90 index ed121fe0..af102d66 100644 --- a/base/psblas/psb_cspsm.f90 +++ b/base/psblas/psb_cspsm.f90 @@ -606,15 +606,6 @@ subroutine psb_cspsv_vect(alpha,a,x,beta,y,desc_a,info,& endif - ! just this case right now - ia = 1 - ja = 1 - ix = 1 - iy = 1 - ik = 1 - jx = 1 - jy = 1 - if (present(choice)) then choice_ = choice else @@ -645,10 +636,13 @@ subroutine psb_cspsv_vect(alpha,a,x,beta,y,desc_a,info,& ncol = desc_a%get_local_cols() lldx = x%get_nrows() lldy = y%get_nrows() + if ((info == 0).and.(lldx null() ! check for presence/size of a work area diff --git a/base/psblas/psb_dspsm.f90 b/base/psblas/psb_dspsm.f90 index eeb601d9..9d2f6043 100644 --- a/base/psblas/psb_dspsm.f90 +++ b/base/psblas/psb_dspsm.f90 @@ -606,15 +606,6 @@ subroutine psb_dspsv_vect(alpha,a,x,beta,y,desc_a,info,& endif - ! just this case right now - ia = 1 - ja = 1 - ix = 1 - iy = 1 - ik = 1 - jx = 1 - jy = 1 - if (present(choice)) then choice_ = choice else @@ -645,10 +636,13 @@ subroutine psb_dspsv_vect(alpha,a,x,beta,y,desc_a,info,& ncol = desc_a%get_local_cols() lldx = x%get_nrows() lldy = y%get_nrows() + if ((info == 0).and.(lldx null() ! check for presence/size of a work area diff --git a/base/psblas/psb_sspsm.f90 b/base/psblas/psb_sspsm.f90 index c26df4aa..be7cc6e4 100644 --- a/base/psblas/psb_sspsm.f90 +++ b/base/psblas/psb_sspsm.f90 @@ -606,15 +606,6 @@ subroutine psb_sspsv_vect(alpha,a,x,beta,y,desc_a,info,& endif - ! just this case right now - ia = 1 - ja = 1 - ix = 1 - iy = 1 - ik = 1 - jx = 1 - jy = 1 - if (present(choice)) then choice_ = choice else @@ -645,10 +636,13 @@ subroutine psb_sspsv_vect(alpha,a,x,beta,y,desc_a,info,& ncol = desc_a%get_local_cols() lldx = x%get_nrows() lldy = y%get_nrows() + if ((info == 0).and.(lldx null() ! check for presence/size of a work area diff --git a/base/psblas/psb_zspsm.f90 b/base/psblas/psb_zspsm.f90 index b09425d4..363343af 100644 --- a/base/psblas/psb_zspsm.f90 +++ b/base/psblas/psb_zspsm.f90 @@ -606,15 +606,6 @@ subroutine psb_zspsv_vect(alpha,a,x,beta,y,desc_a,info,& endif - ! just this case right now - ia = 1 - ja = 1 - ix = 1 - iy = 1 - ik = 1 - jx = 1 - jy = 1 - if (present(choice)) then choice_ = choice else @@ -645,10 +636,13 @@ subroutine psb_zspsv_vect(alpha,a,x,beta,y,desc_a,info,& ncol = desc_a%get_local_cols() lldx = x%get_nrows() lldy = y%get_nrows() + if ((info == 0).and.(lldx0)) if (info /= psb_success_) then ierr(1) = info @@ -178,10 +174,8 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) goto 9999 end if - ila(1:nz) = ia(1:nz) - jla(1:nz) = ja(1:nz) - call psb_glob_to_loc(ila(1:nz),desc_a,info,iact='I') - call psb_glob_to_loc(jla(1:nz),desc_a,info,iact='I') + call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info) + call desc_a%indxmap%g2l(ja(1:nz),jla(1:nz),info) call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info) if (info /= psb_success_) then @@ -282,11 +276,9 @@ subroutine psb_cspins_2desc(nz,ia,ja,val,a,desc_ar,desc_ac,info) & a_err='allocate',i_err=ierr) goto 9999 end if - ila(1:nz) = ia(1:nz) - - call psb_glob_to_loc(ia(1:nz),ila(1:nz),desc_ar,info,iact='I',owned=.true.) - call psb_cdins(nz,ja,desc_ac,info,jla=jla, mask=(ila(1:nz)>0)) + call desc_ar%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) + call desc_ac%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info, mask=(ila(1:nz)>0)) if (psb_errstatus_fatal()) then ierr(1) = info diff --git a/base/tools/psb_dspins.f90 b/base/tools/psb_dspins.f90 index 15685d52..56a3be17 100644 --- a/base/tools/psb_dspins.f90 +++ b/base/tools/psb_dspins.f90 @@ -75,12 +75,6 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) name = 'psb_dspins' call psb_erractionsave(err_act) - if (.not.desc_a%is_ok()) then - info = psb_err_invalid_cd_state_ - call psb_errpush(info,name) - goto 9999 - end if - ictxt = desc_a%get_context() call psb_info(ictxt, me, np) @@ -133,7 +127,9 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='allocate',i_err=ierr) goto 9999 end if - call psb_cdins(nz,ia,ja,desc_a,info,ila=ila,jla=jla) + + call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) + call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,mask=(ila(1:nz)>0)) if (info /= psb_success_) then ierr(1) = info @@ -178,10 +174,8 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) goto 9999 end if - ila(1:nz) = ia(1:nz) - jla(1:nz) = ja(1:nz) - call psb_glob_to_loc(ila(1:nz),desc_a,info,iact='I') - call psb_glob_to_loc(jla(1:nz),desc_a,info,iact='I') + call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info) + call desc_a%indxmap%g2l(ja(1:nz),jla(1:nz),info) call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info) if (info /= psb_success_) then @@ -282,11 +276,9 @@ subroutine psb_dspins_2desc(nz,ia,ja,val,a,desc_ar,desc_ac,info) & a_err='allocate',i_err=ierr) goto 9999 end if - ila(1:nz) = ia(1:nz) - - call psb_glob_to_loc(ia(1:nz),ila(1:nz),desc_ar,info,iact='I',owned=.true.) - call psb_cdins(nz,ja,desc_ac,info,jla=jla, mask=(ila(1:nz)>0)) + call desc_ar%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) + call desc_ac%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info, mask=(ila(1:nz)>0)) if (psb_errstatus_fatal()) then ierr(1) = info diff --git a/base/tools/psb_sspins.f90 b/base/tools/psb_sspins.f90 index 52b31981..b3110bbe 100644 --- a/base/tools/psb_sspins.f90 +++ b/base/tools/psb_sspins.f90 @@ -75,12 +75,6 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) name = 'psb_sspins' call psb_erractionsave(err_act) - if (.not.desc_a%is_ok()) then - info = psb_err_invalid_cd_state_ - call psb_errpush(info,name) - goto 9999 - end if - ictxt = desc_a%get_context() call psb_info(ictxt, me, np) @@ -133,7 +127,9 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='allocate',i_err=ierr) goto 9999 end if - call psb_cdins(nz,ia,ja,desc_a,info,ila=ila,jla=jla) + + call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) + call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,mask=(ila(1:nz)>0)) if (info /= psb_success_) then ierr(1) = info @@ -178,10 +174,8 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) goto 9999 end if - ila(1:nz) = ia(1:nz) - jla(1:nz) = ja(1:nz) - call psb_glob_to_loc(ila(1:nz),desc_a,info,iact='I') - call psb_glob_to_loc(jla(1:nz),desc_a,info,iact='I') + call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info) + call desc_a%indxmap%g2l(ja(1:nz),jla(1:nz),info) call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info) if (info /= psb_success_) then @@ -282,11 +276,9 @@ subroutine psb_sspins_2desc(nz,ia,ja,val,a,desc_ar,desc_ac,info) & a_err='allocate',i_err=ierr) goto 9999 end if - ila(1:nz) = ia(1:nz) - - call psb_glob_to_loc(ia(1:nz),ila(1:nz),desc_ar,info,iact='I',owned=.true.) - call psb_cdins(nz,ja,desc_ac,info,jla=jla, mask=(ila(1:nz)>0)) + call desc_ar%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) + call desc_ac%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info, mask=(ila(1:nz)>0)) if (psb_errstatus_fatal()) then ierr(1) = info diff --git a/base/tools/psb_zspins.f90 b/base/tools/psb_zspins.f90 index f7edaeba..5deb87ba 100644 --- a/base/tools/psb_zspins.f90 +++ b/base/tools/psb_zspins.f90 @@ -75,12 +75,6 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) name = 'psb_zspins' call psb_erractionsave(err_act) - if (.not.desc_a%is_ok()) then - info = psb_err_invalid_cd_state_ - call psb_errpush(info,name) - goto 9999 - end if - ictxt = desc_a%get_context() call psb_info(ictxt, me, np) @@ -133,7 +127,9 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='allocate',i_err=ierr) goto 9999 end if - call psb_cdins(nz,ia,ja,desc_a,info,ila=ila,jla=jla) + + call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) + call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,mask=(ila(1:nz)>0)) if (info /= psb_success_) then ierr(1) = info @@ -178,10 +174,8 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) goto 9999 end if - ila(1:nz) = ia(1:nz) - jla(1:nz) = ja(1:nz) - call psb_glob_to_loc(ila(1:nz),desc_a,info,iact='I') - call psb_glob_to_loc(jla(1:nz),desc_a,info,iact='I') + call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info) + call desc_a%indxmap%g2l(ja(1:nz),jla(1:nz),info) call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info) if (info /= psb_success_) then @@ -282,11 +276,9 @@ subroutine psb_zspins_2desc(nz,ia,ja,val,a,desc_ar,desc_ac,info) & a_err='allocate',i_err=ierr) goto 9999 end if - ila(1:nz) = ia(1:nz) - - call psb_glob_to_loc(ia(1:nz),ila(1:nz),desc_ar,info,iact='I',owned=.true.) - call psb_cdins(nz,ja,desc_ac,info,jla=jla, mask=(ila(1:nz)>0)) + call desc_ar%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) + call desc_ac%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info, mask=(ila(1:nz)>0)) if (psb_errstatus_fatal()) then ierr(1) = info