From 775b1de8d69d8a22d135e25cb3ba365b10e81a08 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 1 Oct 2013 14:45:45 +0000 Subject: [PATCH] psblas3: base/psblas/psb_cspmm.f90 base/psblas/psb_cspsm.f90 base/psblas/psb_dspmm.f90 base/psblas/psb_dspsm.f90 base/psblas/psb_sspmm.f90 base/psblas/psb_sspsm.f90 base/psblas/psb_zspmm.f90 base/psblas/psb_zspsm.f90 base/tools/psb_cdins.f90 base/tools/psb_cspins.f90 base/tools/psb_dspins.f90 base/tools/psb_loc_to_glob.f90 base/tools/psb_sspins.f90 base/tools/psb_zspins.f90 Performance fixes in spins/cdins, take 1. Vector checks in _sm, _mm _vect --- base/psblas/psb_cspmm.f90 | 1 - base/psblas/psb_cspsm.f90 | 47 +++++----------------------------- base/psblas/psb_dspmm.f90 | 1 - base/psblas/psb_dspsm.f90 | 47 +++++----------------------------- base/psblas/psb_sspmm.f90 | 1 - base/psblas/psb_sspsm.f90 | 47 +++++----------------------------- base/psblas/psb_zspmm.f90 | 1 - base/psblas/psb_zspsm.f90 | 47 +++++----------------------------- base/tools/psb_cdins.f90 | 10 -------- base/tools/psb_cspins.f90 | 22 +++++----------- base/tools/psb_dspins.f90 | 12 +-------- base/tools/psb_loc_to_glob.f90 | 11 ++++++++ base/tools/psb_sspins.f90 | 22 +++++----------- base/tools/psb_zspins.f90 | 22 +++++----------- 14 files changed, 57 insertions(+), 234 deletions(-) diff --git a/base/psblas/psb_cspmm.f90 b/base/psblas/psb_cspmm.f90 index bdf6e685..5636eb08 100644 --- a/base/psblas/psb_cspmm.f90 +++ b/base/psblas/psb_cspmm.f90 @@ -765,7 +765,6 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& call psb_errpush(info,name,a_err=ch_err) goto 9999 end if - iwork => null() ! check for presence/size of a work area diff --git a/base/psblas/psb_cspsm.f90 b/base/psblas/psb_cspsm.f90 index ed121fe0..af102d66 100644 --- a/base/psblas/psb_cspsm.f90 +++ b/base/psblas/psb_cspsm.f90 @@ -606,15 +606,6 @@ subroutine psb_cspsv_vect(alpha,a,x,beta,y,desc_a,info,& endif - ! just this case right now - ia = 1 - ja = 1 - ix = 1 - iy = 1 - ik = 1 - jx = 1 - jy = 1 - if (present(choice)) then choice_ = choice else @@ -645,10 +636,13 @@ subroutine psb_cspsv_vect(alpha,a,x,beta,y,desc_a,info,& ncol = desc_a%get_local_cols() lldx = x%get_nrows() lldy = y%get_nrows() + if ((info == 0).and.(lldx null() ! check for presence/size of a work area diff --git a/base/psblas/psb_dspsm.f90 b/base/psblas/psb_dspsm.f90 index eeb601d9..9d2f6043 100644 --- a/base/psblas/psb_dspsm.f90 +++ b/base/psblas/psb_dspsm.f90 @@ -606,15 +606,6 @@ subroutine psb_dspsv_vect(alpha,a,x,beta,y,desc_a,info,& endif - ! just this case right now - ia = 1 - ja = 1 - ix = 1 - iy = 1 - ik = 1 - jx = 1 - jy = 1 - if (present(choice)) then choice_ = choice else @@ -645,10 +636,13 @@ subroutine psb_dspsv_vect(alpha,a,x,beta,y,desc_a,info,& ncol = desc_a%get_local_cols() lldx = x%get_nrows() lldy = y%get_nrows() + if ((info == 0).and.(lldx null() ! check for presence/size of a work area diff --git a/base/psblas/psb_sspsm.f90 b/base/psblas/psb_sspsm.f90 index c26df4aa..be7cc6e4 100644 --- a/base/psblas/psb_sspsm.f90 +++ b/base/psblas/psb_sspsm.f90 @@ -606,15 +606,6 @@ subroutine psb_sspsv_vect(alpha,a,x,beta,y,desc_a,info,& endif - ! just this case right now - ia = 1 - ja = 1 - ix = 1 - iy = 1 - ik = 1 - jx = 1 - jy = 1 - if (present(choice)) then choice_ = choice else @@ -645,10 +636,13 @@ subroutine psb_sspsv_vect(alpha,a,x,beta,y,desc_a,info,& ncol = desc_a%get_local_cols() lldx = x%get_nrows() lldy = y%get_nrows() + if ((info == 0).and.(lldx null() ! check for presence/size of a work area diff --git a/base/psblas/psb_zspsm.f90 b/base/psblas/psb_zspsm.f90 index b09425d4..363343af 100644 --- a/base/psblas/psb_zspsm.f90 +++ b/base/psblas/psb_zspsm.f90 @@ -606,15 +606,6 @@ subroutine psb_zspsv_vect(alpha,a,x,beta,y,desc_a,info,& endif - ! just this case right now - ia = 1 - ja = 1 - ix = 1 - iy = 1 - ik = 1 - jx = 1 - jy = 1 - if (present(choice)) then choice_ = choice else @@ -645,10 +636,13 @@ subroutine psb_zspsv_vect(alpha,a,x,beta,y,desc_a,info,& ncol = desc_a%get_local_cols() lldx = x%get_nrows() lldy = y%get_nrows() + if ((info == 0).and.(lldx0)) -!!$ call psi_idx_cnv(nz,ia,ila,desc_a,info,owned=.true.) -!!$ if (info == psb_success_) & -!!$ & call psb_cdins(nz,ja,desc_a,info,jla=jla,mask=(ila(1:nz)>0)) else if (present(ila).or.present(jla)) then write(psb_err_unit,*) 'Inconsistent call : ',present(ila),present(jla) @@ -139,10 +136,6 @@ subroutine psb_cdinsrc(nz,ia,ja,desc_a,info,ila,jla) jla_(1:nz) = ja(1:nz) call desc_a%indxmap%g2lip_ins(jla_(1:nz),info,mask=(ila_(1:nz)>0)) end if - -!!$ call psi_idx_cnv(nz,ia,ila_,desc_a,info,owned=.true.) -!!$ if (info == psb_success_) & -!!$ & call psb_cdins(nz,ja,desc_a,info,mask=(ila_(1:nz)>0)) deallocate(ila_) end if if (info /= psb_success_) goto 9999 @@ -256,9 +249,7 @@ subroutine psb_cdinsc(nz,ja,desc,info,jla,mask,lidx) end if if (present(jla)) then -!!$ call psi_idx_ins_cnv(nz,ja,jla,desc,info,mask=mask,lidx=lidx) call desc%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,mask=mask,lidx=lidx) - else allocate(jla_(nz),stat=info) if (info /= psb_success_) then @@ -266,7 +257,6 @@ subroutine psb_cdinsc(nz,ja,desc,info,jla,mask,lidx) call psb_errpush(info,name) goto 9999 end if -!!$ call psi_idx_ins_cnv(nz,ja,jla_,desc,info,mask=mask,lidx=lidx) call desc%indxmap%g2l_ins(ja(1:nz),jla_(1:nz),info,mask=mask,lidx=lidx) deallocate(jla_) end if diff --git a/base/tools/psb_cspins.f90 b/base/tools/psb_cspins.f90 index e2bbef11..f936aa0e 100644 --- a/base/tools/psb_cspins.f90 +++ b/base/tools/psb_cspins.f90 @@ -75,12 +75,6 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) name = 'psb_cspins' call psb_erractionsave(err_act) - if (.not.desc_a%is_ok()) then - info = psb_err_invalid_cd_state_ - call psb_errpush(info,name) - goto 9999 - end if - ictxt = desc_a%get_context() call psb_info(ictxt, me, np) @@ -133,7 +127,9 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='allocate',i_err=ierr) goto 9999 end if - call psb_cdins(nz,ia,ja,desc_a,info,ila=ila,jla=jla) + + call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) + call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,mask=(ila(1:nz)>0)) if (info /= psb_success_) then ierr(1) = info @@ -178,10 +174,8 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) goto 9999 end if - ila(1:nz) = ia(1:nz) - jla(1:nz) = ja(1:nz) - call psb_glob_to_loc(ila(1:nz),desc_a,info,iact='I') - call psb_glob_to_loc(jla(1:nz),desc_a,info,iact='I') + call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info) + call desc_a%indxmap%g2l(ja(1:nz),jla(1:nz),info) call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info) if (info /= psb_success_) then @@ -282,11 +276,9 @@ subroutine psb_cspins_2desc(nz,ia,ja,val,a,desc_ar,desc_ac,info) & a_err='allocate',i_err=ierr) goto 9999 end if - ila(1:nz) = ia(1:nz) - - call psb_glob_to_loc(ia(1:nz),ila(1:nz),desc_ar,info,iact='I',owned=.true.) - call psb_cdins(nz,ja,desc_ac,info,jla=jla, mask=(ila(1:nz)>0)) + call desc_ar%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) + call desc_ac%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info, mask=(ila(1:nz)>0)) if (psb_errstatus_fatal()) then ierr(1) = info diff --git a/base/tools/psb_dspins.f90 b/base/tools/psb_dspins.f90 index 62c8b657..56a3be17 100644 --- a/base/tools/psb_dspins.f90 +++ b/base/tools/psb_dspins.f90 @@ -75,12 +75,6 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) name = 'psb_dspins' call psb_erractionsave(err_act) -!!$ if (.not.desc_a%is_ok()) then -!!$ info = psb_err_invalid_cd_state_ -!!$ call psb_errpush(info,name) -!!$ goto 9999 -!!$ end if - ictxt = desc_a%get_context() call psb_info(ictxt, me, np) @@ -133,7 +127,7 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='allocate',i_err=ierr) goto 9999 end if -!!$ call psb_cdins(nz,ia,ja,desc_a,info,ila=ila,jla=jla,good_desc=.true.) + call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,mask=(ila(1:nz)>0)) @@ -180,8 +174,6 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) goto 9999 end if -!!$ call psb_glob_to_loc(ia(1:nz),ila(1:nz),desc_a,info,iact='I') -!!$ call psb_glob_to_loc(ja(1:nz),jla(1:nz),desc_a,info,iact='I') call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info) call desc_a%indxmap%g2l(ja(1:nz),jla(1:nz),info) @@ -286,8 +278,6 @@ subroutine psb_dspins_2desc(nz,ia,ja,val,a,desc_ar,desc_ac,info) end if call desc_ar%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) - -!!$ call psb_cdins(nz,ja,desc_ac,info,jla=jla, mask=(ila(1:nz)>0)) call desc_ac%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info, mask=(ila(1:nz)>0)) if (psb_errstatus_fatal()) then diff --git a/base/tools/psb_loc_to_glob.f90 b/base/tools/psb_loc_to_glob.f90 index 0255ef0d..67e07409 100644 --- a/base/tools/psb_loc_to_glob.f90 +++ b/base/tools/psb_loc_to_glob.f90 @@ -66,6 +66,12 @@ subroutine psb_loc_to_glob2v(x,y,desc_a,info,iact) info=psb_success_ name='psb_loc_to_glob2' call psb_erractionsave(err_act) + if (.not.desc_a%is_valid()) then + info = psb_err_invalid_cd_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (present(iact)) then act=iact @@ -171,6 +177,11 @@ subroutine psb_loc_to_glob1v(x,desc_a,info,iact) info=psb_success_ name='psb_loc_to_glob' call psb_erractionsave(err_act) + if (.not.desc_a%is_valid()) then + info = psb_err_invalid_cd_state_ + call psb_errpush(info,name) + goto 9999 + endif if (present(iact)) then act=iact diff --git a/base/tools/psb_sspins.f90 b/base/tools/psb_sspins.f90 index 52b31981..b3110bbe 100644 --- a/base/tools/psb_sspins.f90 +++ b/base/tools/psb_sspins.f90 @@ -75,12 +75,6 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) name = 'psb_sspins' call psb_erractionsave(err_act) - if (.not.desc_a%is_ok()) then - info = psb_err_invalid_cd_state_ - call psb_errpush(info,name) - goto 9999 - end if - ictxt = desc_a%get_context() call psb_info(ictxt, me, np) @@ -133,7 +127,9 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='allocate',i_err=ierr) goto 9999 end if - call psb_cdins(nz,ia,ja,desc_a,info,ila=ila,jla=jla) + + call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) + call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,mask=(ila(1:nz)>0)) if (info /= psb_success_) then ierr(1) = info @@ -178,10 +174,8 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) goto 9999 end if - ila(1:nz) = ia(1:nz) - jla(1:nz) = ja(1:nz) - call psb_glob_to_loc(ila(1:nz),desc_a,info,iact='I') - call psb_glob_to_loc(jla(1:nz),desc_a,info,iact='I') + call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info) + call desc_a%indxmap%g2l(ja(1:nz),jla(1:nz),info) call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info) if (info /= psb_success_) then @@ -282,11 +276,9 @@ subroutine psb_sspins_2desc(nz,ia,ja,val,a,desc_ar,desc_ac,info) & a_err='allocate',i_err=ierr) goto 9999 end if - ila(1:nz) = ia(1:nz) - - call psb_glob_to_loc(ia(1:nz),ila(1:nz),desc_ar,info,iact='I',owned=.true.) - call psb_cdins(nz,ja,desc_ac,info,jla=jla, mask=(ila(1:nz)>0)) + call desc_ar%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) + call desc_ac%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info, mask=(ila(1:nz)>0)) if (psb_errstatus_fatal()) then ierr(1) = info diff --git a/base/tools/psb_zspins.f90 b/base/tools/psb_zspins.f90 index f7edaeba..5deb87ba 100644 --- a/base/tools/psb_zspins.f90 +++ b/base/tools/psb_zspins.f90 @@ -75,12 +75,6 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) name = 'psb_zspins' call psb_erractionsave(err_act) - if (.not.desc_a%is_ok()) then - info = psb_err_invalid_cd_state_ - call psb_errpush(info,name) - goto 9999 - end if - ictxt = desc_a%get_context() call psb_info(ictxt, me, np) @@ -133,7 +127,9 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='allocate',i_err=ierr) goto 9999 end if - call psb_cdins(nz,ia,ja,desc_a,info,ila=ila,jla=jla) + + call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) + call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,mask=(ila(1:nz)>0)) if (info /= psb_success_) then ierr(1) = info @@ -178,10 +174,8 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) goto 9999 end if - ila(1:nz) = ia(1:nz) - jla(1:nz) = ja(1:nz) - call psb_glob_to_loc(ila(1:nz),desc_a,info,iact='I') - call psb_glob_to_loc(jla(1:nz),desc_a,info,iact='I') + call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info) + call desc_a%indxmap%g2l(ja(1:nz),jla(1:nz),info) call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info) if (info /= psb_success_) then @@ -282,11 +276,9 @@ subroutine psb_zspins_2desc(nz,ia,ja,val,a,desc_ar,desc_ac,info) & a_err='allocate',i_err=ierr) goto 9999 end if - ila(1:nz) = ia(1:nz) - - call psb_glob_to_loc(ia(1:nz),ila(1:nz),desc_ar,info,iact='I',owned=.true.) - call psb_cdins(nz,ja,desc_ac,info,jla=jla, mask=(ila(1:nz)>0)) + call desc_ar%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) + call desc_ac%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info, mask=(ila(1:nz)>0)) if (psb_errstatus_fatal()) then ierr(1) = info