diff --git a/.gitignore b/.gitignore index 485fb98d..9d711cb9 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ # header files generated cbind/*.h +util/psb_metis_int.h # Make.inc generated /Make.inc diff --git a/Make.inc.in b/Make.inc.in index b6ae650f..5741fca7 100755 --- a/Make.inc.in +++ b/Make.inc.in @@ -23,7 +23,7 @@ EXTRA_OPT=@EXTRA_OPT@ MPFC=@MPIFC@ MPCC=@MPICC@ -FLINK=$(MPFC) +FLINK=@FLINK@ LIBS=@LIBS@ diff --git a/base/comm/internals/psi_cswapdata.F90 b/base/comm/internals/psi_cswapdata.F90 index 5af5b79e..5f0b8bdc 100644 --- a/base/comm/internals/psi_cswapdata.F90 +++ b/base/comm/internals/psi_cswapdata.F90 @@ -175,7 +175,7 @@ end subroutine psi_cswapdata_vect ! ! ! -subroutine psi_cswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & +subroutine psi_cswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, & & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_cswap_vidx_vect @@ -192,8 +192,8 @@ subroutine psi_cswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_c_base_vect_type) :: y @@ -203,9 +203,8 @@ subroutine psi_cswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -218,9 +217,6 @@ subroutine psi_cswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -517,7 +513,7 @@ end subroutine psi_cswapdata_multivect ! ! ! -subroutine psi_cswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & +subroutine psi_cswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, & & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_cswap_vidx_multivect @@ -534,8 +530,8 @@ subroutine psi_cswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_c_base_multivect_type) :: y @@ -545,9 +541,8 @@ subroutine psi_cswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -560,9 +555,6 @@ subroutine psi_cswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_cswapdata_a.F90 b/base/comm/internals/psi_cswapdata_a.F90 index 43b91872..715b674e 100644 --- a/base/comm/internals/psi_cswapdata_a.F90 +++ b/base/comm/internals/psi_cswapdata_a.F90 @@ -179,8 +179,8 @@ subroutine psi_cswapidxm(ctxt,icomm,flag,n,beta,y,idx, & ! locals - integer(psb_mpk_) :: np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -197,7 +197,6 @@ subroutine psi_cswapidxm(ctxt,icomm,flag,n,beta,y,idx, & info=psb_success_ name='psi_swap_data' call psb_erractionsave(err_act) - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -641,7 +640,7 @@ end subroutine psi_cswapdatav ! ! ! -subroutine psi_cswapidxv(ictxt,iicomm,flag,beta,y,idx, & +subroutine psi_cswapidxv(ctxt,icomm,flag,beta,y,idx, & & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_cswapidxv @@ -656,8 +655,8 @@ subroutine psi_cswapidxv(ictxt,iicomm,flag,beta,y,idx, & include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info complex(psb_spk_) :: y(:), beta @@ -665,9 +664,8 @@ subroutine psi_cswapidxv(ictxt,iicomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -684,9 +682,6 @@ subroutine psi_cswapidxv(ictxt,iicomm,flag,beta,y,idx, & info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_cswaptran.F90 b/base/comm/internals/psi_cswaptran.F90 index 401d8435..aefb6b01 100644 --- a/base/comm/internals/psi_cswaptran.F90 +++ b/base/comm/internals/psi_cswaptran.F90 @@ -176,7 +176,7 @@ end subroutine psi_cswaptran_vect ! ! ! -subroutine psi_ctran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& +subroutine psi_ctran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_ctran_vidx_vect @@ -193,8 +193,8 @@ subroutine psi_ctran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_c_base_vect_type) :: y @@ -204,9 +204,8 @@ subroutine psi_ctran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -219,9 +218,6 @@ subroutine psi_ctran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -528,7 +524,7 @@ subroutine psi_cswaptran_multivect(flag,beta,y,desc_a,work,info,data) ! ! ! -subroutine psi_ctran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& +subroutine psi_ctran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_ctran_vidx_multivect @@ -545,8 +541,8 @@ subroutine psi_ctran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_c_base_multivect_type) :: y @@ -556,9 +552,8 @@ subroutine psi_ctran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -571,9 +566,6 @@ subroutine psi_ctran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_cswaptran_a.F90 b/base/comm/internals/psi_cswaptran_a.F90 index 508e445d..a7f2c687 100644 --- a/base/comm/internals/psi_cswaptran_a.F90 +++ b/base/comm/internals/psi_cswaptran_a.F90 @@ -159,7 +159,7 @@ subroutine psi_cswaptranm(flag,n,beta,y,desc_a,work,info,data) return end subroutine psi_cswaptranm -subroutine psi_ctranidxm(ictxt,iicomm,flag,n,beta,y,idx,& +subroutine psi_ctranidxm(ctxt,icomm,flag,n,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_ctranidxm @@ -174,8 +174,8 @@ subroutine psi_ctranidxm(ictxt,iicomm,flag,n,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag,n integer(psb_ipk_), intent(out) :: info complex(psb_spk_) :: y(:,:), beta @@ -183,9 +183,8 @@ subroutine psi_ctranidxm(ictxt,iicomm,flag,n,beta,y,idx,& integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -202,9 +201,6 @@ subroutine psi_ctranidxm(ictxt,iicomm,flag,n,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -656,7 +652,7 @@ end subroutine psi_cswaptranv ! ! ! -subroutine psi_ctranidxv(ictxt,iicomm,flag,beta,y,idx,& +subroutine psi_ctranidxv(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_ctranidxv @@ -671,8 +667,8 @@ subroutine psi_ctranidxv(ictxt,iicomm,flag,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info complex(psb_spk_) :: y(:), beta @@ -680,9 +676,8 @@ subroutine psi_ctranidxv(ictxt,iicomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -699,9 +694,6 @@ subroutine psi_ctranidxv(ictxt,iicomm,flag,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_dswapdata.F90 b/base/comm/internals/psi_dswapdata.F90 index f99f0254..fe529706 100644 --- a/base/comm/internals/psi_dswapdata.F90 +++ b/base/comm/internals/psi_dswapdata.F90 @@ -175,7 +175,7 @@ end subroutine psi_dswapdata_vect ! ! ! -subroutine psi_dswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & +subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, & & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_dswap_vidx_vect @@ -192,8 +192,8 @@ subroutine psi_dswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_d_base_vect_type) :: y @@ -203,9 +203,8 @@ subroutine psi_dswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -218,9 +217,6 @@ subroutine psi_dswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -517,7 +513,7 @@ end subroutine psi_dswapdata_multivect ! ! ! -subroutine psi_dswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & +subroutine psi_dswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, & & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_dswap_vidx_multivect @@ -534,8 +530,8 @@ subroutine psi_dswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_d_base_multivect_type) :: y @@ -545,9 +541,8 @@ subroutine psi_dswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -560,9 +555,6 @@ subroutine psi_dswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_dswapdata_a.F90 b/base/comm/internals/psi_dswapdata_a.F90 index b85b1c6d..aff32517 100644 --- a/base/comm/internals/psi_dswapdata_a.F90 +++ b/base/comm/internals/psi_dswapdata_a.F90 @@ -179,8 +179,8 @@ subroutine psi_dswapidxm(ctxt,icomm,flag,n,beta,y,idx, & ! locals - integer(psb_mpk_) :: np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -197,7 +197,6 @@ subroutine psi_dswapidxm(ctxt,icomm,flag,n,beta,y,idx, & info=psb_success_ name='psi_swap_data' call psb_erractionsave(err_act) - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -641,7 +640,7 @@ end subroutine psi_dswapdatav ! ! ! -subroutine psi_dswapidxv(ictxt,iicomm,flag,beta,y,idx, & +subroutine psi_dswapidxv(ctxt,icomm,flag,beta,y,idx, & & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_dswapidxv @@ -656,8 +655,8 @@ subroutine psi_dswapidxv(ictxt,iicomm,flag,beta,y,idx, & include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info real(psb_dpk_) :: y(:), beta @@ -665,9 +664,8 @@ subroutine psi_dswapidxv(ictxt,iicomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -684,9 +682,6 @@ subroutine psi_dswapidxv(ictxt,iicomm,flag,beta,y,idx, & info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_dswaptran.F90 b/base/comm/internals/psi_dswaptran.F90 index de46ad03..df98e1ae 100644 --- a/base/comm/internals/psi_dswaptran.F90 +++ b/base/comm/internals/psi_dswaptran.F90 @@ -176,7 +176,7 @@ end subroutine psi_dswaptran_vect ! ! ! -subroutine psi_dtran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& +subroutine psi_dtran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_dtran_vidx_vect @@ -193,8 +193,8 @@ subroutine psi_dtran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_d_base_vect_type) :: y @@ -204,9 +204,8 @@ subroutine psi_dtran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -219,9 +218,6 @@ subroutine psi_dtran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -528,7 +524,7 @@ subroutine psi_dswaptran_multivect(flag,beta,y,desc_a,work,info,data) ! ! ! -subroutine psi_dtran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& +subroutine psi_dtran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_dtran_vidx_multivect @@ -545,8 +541,8 @@ subroutine psi_dtran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_d_base_multivect_type) :: y @@ -556,9 +552,8 @@ subroutine psi_dtran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -571,9 +566,6 @@ subroutine psi_dtran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_dswaptran_a.F90 b/base/comm/internals/psi_dswaptran_a.F90 index 8bc7b82f..ed13df40 100644 --- a/base/comm/internals/psi_dswaptran_a.F90 +++ b/base/comm/internals/psi_dswaptran_a.F90 @@ -159,7 +159,7 @@ subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info,data) return end subroutine psi_dswaptranm -subroutine psi_dtranidxm(ictxt,iicomm,flag,n,beta,y,idx,& +subroutine psi_dtranidxm(ctxt,icomm,flag,n,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_dtranidxm @@ -174,8 +174,8 @@ subroutine psi_dtranidxm(ictxt,iicomm,flag,n,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag,n integer(psb_ipk_), intent(out) :: info real(psb_dpk_) :: y(:,:), beta @@ -183,9 +183,8 @@ subroutine psi_dtranidxm(ictxt,iicomm,flag,n,beta,y,idx,& integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -202,9 +201,6 @@ subroutine psi_dtranidxm(ictxt,iicomm,flag,n,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -656,7 +652,7 @@ end subroutine psi_dswaptranv ! ! ! -subroutine psi_dtranidxv(ictxt,iicomm,flag,beta,y,idx,& +subroutine psi_dtranidxv(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_dtranidxv @@ -671,8 +667,8 @@ subroutine psi_dtranidxv(ictxt,iicomm,flag,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info real(psb_dpk_) :: y(:), beta @@ -680,9 +676,8 @@ subroutine psi_dtranidxv(ictxt,iicomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -699,9 +694,6 @@ subroutine psi_dtranidxv(ictxt,iicomm,flag,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_eswapdata_a.F90 b/base/comm/internals/psi_eswapdata_a.F90 index 3dc1786e..6a644563 100644 --- a/base/comm/internals/psi_eswapdata_a.F90 +++ b/base/comm/internals/psi_eswapdata_a.F90 @@ -179,8 +179,8 @@ subroutine psi_eswapidxm(ctxt,icomm,flag,n,beta,y,idx, & ! locals - integer(psb_mpk_) :: np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -197,7 +197,6 @@ subroutine psi_eswapidxm(ctxt,icomm,flag,n,beta,y,idx, & info=psb_success_ name='psi_swap_data' call psb_erractionsave(err_act) - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -641,7 +640,7 @@ end subroutine psi_eswapdatav ! ! ! -subroutine psi_eswapidxv(ictxt,iicomm,flag,beta,y,idx, & +subroutine psi_eswapidxv(ctxt,icomm,flag,beta,y,idx, & & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_eswapidxv @@ -656,8 +655,8 @@ subroutine psi_eswapidxv(ictxt,iicomm,flag,beta,y,idx, & include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_epk_) :: y(:), beta @@ -665,9 +664,8 @@ subroutine psi_eswapidxv(ictxt,iicomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -684,9 +682,6 @@ subroutine psi_eswapidxv(ictxt,iicomm,flag,beta,y,idx, & info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_eswaptran_a.F90 b/base/comm/internals/psi_eswaptran_a.F90 index 11419613..78ed7d8b 100644 --- a/base/comm/internals/psi_eswaptran_a.F90 +++ b/base/comm/internals/psi_eswaptran_a.F90 @@ -159,7 +159,7 @@ subroutine psi_eswaptranm(flag,n,beta,y,desc_a,work,info,data) return end subroutine psi_eswaptranm -subroutine psi_etranidxm(ictxt,iicomm,flag,n,beta,y,idx,& +subroutine psi_etranidxm(ctxt,icomm,flag,n,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_etranidxm @@ -174,8 +174,8 @@ subroutine psi_etranidxm(ictxt,iicomm,flag,n,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag,n integer(psb_ipk_), intent(out) :: info integer(psb_epk_) :: y(:,:), beta @@ -183,9 +183,8 @@ subroutine psi_etranidxm(ictxt,iicomm,flag,n,beta,y,idx,& integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -202,9 +201,6 @@ subroutine psi_etranidxm(ictxt,iicomm,flag,n,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -656,7 +652,7 @@ end subroutine psi_eswaptranv ! ! ! -subroutine psi_etranidxv(ictxt,iicomm,flag,beta,y,idx,& +subroutine psi_etranidxv(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_etranidxv @@ -671,8 +667,8 @@ subroutine psi_etranidxv(ictxt,iicomm,flag,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_epk_) :: y(:), beta @@ -680,9 +676,8 @@ subroutine psi_etranidxv(ictxt,iicomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -699,9 +694,6 @@ subroutine psi_etranidxv(ictxt,iicomm,flag,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_i2swapdata_a.F90 b/base/comm/internals/psi_i2swapdata_a.F90 index 6d1928d3..42b4498e 100644 --- a/base/comm/internals/psi_i2swapdata_a.F90 +++ b/base/comm/internals/psi_i2swapdata_a.F90 @@ -179,8 +179,8 @@ subroutine psi_i2swapidxm(ctxt,icomm,flag,n,beta,y,idx, & ! locals - integer(psb_mpk_) :: np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -197,7 +197,6 @@ subroutine psi_i2swapidxm(ctxt,icomm,flag,n,beta,y,idx, & info=psb_success_ name='psi_swap_data' call psb_erractionsave(err_act) - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -641,7 +640,7 @@ end subroutine psi_i2swapdatav ! ! ! -subroutine psi_i2swapidxv(ictxt,iicomm,flag,beta,y,idx, & +subroutine psi_i2swapidxv(ctxt,icomm,flag,beta,y,idx, & & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_i2swapidxv @@ -656,8 +655,8 @@ subroutine psi_i2swapidxv(ictxt,iicomm,flag,beta,y,idx, & include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_i2pk_) :: y(:), beta @@ -665,9 +664,8 @@ subroutine psi_i2swapidxv(ictxt,iicomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -684,9 +682,6 @@ subroutine psi_i2swapidxv(ictxt,iicomm,flag,beta,y,idx, & info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_i2swaptran_a.F90 b/base/comm/internals/psi_i2swaptran_a.F90 index 26f3c820..f94bf29e 100644 --- a/base/comm/internals/psi_i2swaptran_a.F90 +++ b/base/comm/internals/psi_i2swaptran_a.F90 @@ -159,7 +159,7 @@ subroutine psi_i2swaptranm(flag,n,beta,y,desc_a,work,info,data) return end subroutine psi_i2swaptranm -subroutine psi_i2tranidxm(ictxt,iicomm,flag,n,beta,y,idx,& +subroutine psi_i2tranidxm(ctxt,icomm,flag,n,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_i2tranidxm @@ -174,8 +174,8 @@ subroutine psi_i2tranidxm(ictxt,iicomm,flag,n,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag,n integer(psb_ipk_), intent(out) :: info integer(psb_i2pk_) :: y(:,:), beta @@ -183,9 +183,8 @@ subroutine psi_i2tranidxm(ictxt,iicomm,flag,n,beta,y,idx,& integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -202,9 +201,6 @@ subroutine psi_i2tranidxm(ictxt,iicomm,flag,n,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -656,7 +652,7 @@ end subroutine psi_i2swaptranv ! ! ! -subroutine psi_i2tranidxv(ictxt,iicomm,flag,beta,y,idx,& +subroutine psi_i2tranidxv(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_i2tranidxv @@ -671,8 +667,8 @@ subroutine psi_i2tranidxv(ictxt,iicomm,flag,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_i2pk_) :: y(:), beta @@ -680,9 +676,8 @@ subroutine psi_i2tranidxv(ictxt,iicomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -699,9 +694,6 @@ subroutine psi_i2tranidxv(ictxt,iicomm,flag,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_iswapdata.F90 b/base/comm/internals/psi_iswapdata.F90 index e541ff6d..ff4bd074 100644 --- a/base/comm/internals/psi_iswapdata.F90 +++ b/base/comm/internals/psi_iswapdata.F90 @@ -175,7 +175,7 @@ end subroutine psi_iswapdata_vect ! ! ! -subroutine psi_iswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & +subroutine psi_iswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, & & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_iswap_vidx_vect @@ -192,8 +192,8 @@ subroutine psi_iswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_i_base_vect_type) :: y @@ -203,9 +203,8 @@ subroutine psi_iswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -218,9 +217,6 @@ subroutine psi_iswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -517,7 +513,7 @@ end subroutine psi_iswapdata_multivect ! ! ! -subroutine psi_iswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & +subroutine psi_iswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, & & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_iswap_vidx_multivect @@ -534,8 +530,8 @@ subroutine psi_iswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_i_base_multivect_type) :: y @@ -545,9 +541,8 @@ subroutine psi_iswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -560,9 +555,6 @@ subroutine psi_iswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_iswaptran.F90 b/base/comm/internals/psi_iswaptran.F90 index 46bb18b5..75a0a185 100644 --- a/base/comm/internals/psi_iswaptran.F90 +++ b/base/comm/internals/psi_iswaptran.F90 @@ -176,7 +176,7 @@ end subroutine psi_iswaptran_vect ! ! ! -subroutine psi_itran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& +subroutine psi_itran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_itran_vidx_vect @@ -193,8 +193,8 @@ subroutine psi_itran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_i_base_vect_type) :: y @@ -204,9 +204,8 @@ subroutine psi_itran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -219,9 +218,6 @@ subroutine psi_itran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -528,7 +524,7 @@ subroutine psi_iswaptran_multivect(flag,beta,y,desc_a,work,info,data) ! ! ! -subroutine psi_itran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& +subroutine psi_itran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_itran_vidx_multivect @@ -545,8 +541,8 @@ subroutine psi_itran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_i_base_multivect_type) :: y @@ -556,9 +552,8 @@ subroutine psi_itran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -571,9 +566,6 @@ subroutine psi_itran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_lswapdata.F90 b/base/comm/internals/psi_lswapdata.F90 index 088c6508..9201ebfa 100644 --- a/base/comm/internals/psi_lswapdata.F90 +++ b/base/comm/internals/psi_lswapdata.F90 @@ -175,7 +175,7 @@ end subroutine psi_lswapdata_vect ! ! ! -subroutine psi_lswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & +subroutine psi_lswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, & & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_lswap_vidx_vect @@ -192,8 +192,8 @@ subroutine psi_lswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_l_base_vect_type) :: y @@ -203,9 +203,8 @@ subroutine psi_lswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -218,9 +217,6 @@ subroutine psi_lswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -517,7 +513,7 @@ end subroutine psi_lswapdata_multivect ! ! ! -subroutine psi_lswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & +subroutine psi_lswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, & & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_lswap_vidx_multivect @@ -534,8 +530,8 @@ subroutine psi_lswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_l_base_multivect_type) :: y @@ -545,9 +541,8 @@ subroutine psi_lswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -560,9 +555,6 @@ subroutine psi_lswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_lswaptran.F90 b/base/comm/internals/psi_lswaptran.F90 index 60470169..b2b9536c 100644 --- a/base/comm/internals/psi_lswaptran.F90 +++ b/base/comm/internals/psi_lswaptran.F90 @@ -176,7 +176,7 @@ end subroutine psi_lswaptran_vect ! ! ! -subroutine psi_ltran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& +subroutine psi_ltran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_ltran_vidx_vect @@ -193,8 +193,8 @@ subroutine psi_ltran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_l_base_vect_type) :: y @@ -204,9 +204,8 @@ subroutine psi_ltran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -219,9 +218,6 @@ subroutine psi_ltran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -528,7 +524,7 @@ subroutine psi_lswaptran_multivect(flag,beta,y,desc_a,work,info,data) ! ! ! -subroutine psi_ltran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& +subroutine psi_ltran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_ltran_vidx_multivect @@ -545,8 +541,8 @@ subroutine psi_ltran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_l_base_multivect_type) :: y @@ -556,9 +552,8 @@ subroutine psi_ltran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -571,9 +566,6 @@ subroutine psi_ltran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_mswapdata_a.F90 b/base/comm/internals/psi_mswapdata_a.F90 index 8e86c515..e71f3a52 100644 --- a/base/comm/internals/psi_mswapdata_a.F90 +++ b/base/comm/internals/psi_mswapdata_a.F90 @@ -179,8 +179,8 @@ subroutine psi_mswapidxm(ctxt,icomm,flag,n,beta,y,idx, & ! locals - integer(psb_mpk_) :: np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -197,7 +197,6 @@ subroutine psi_mswapidxm(ctxt,icomm,flag,n,beta,y,idx, & info=psb_success_ name='psi_swap_data' call psb_erractionsave(err_act) - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -641,7 +640,7 @@ end subroutine psi_mswapdatav ! ! ! -subroutine psi_mswapidxv(ictxt,iicomm,flag,beta,y,idx, & +subroutine psi_mswapidxv(ctxt,icomm,flag,beta,y,idx, & & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_mswapidxv @@ -656,8 +655,8 @@ subroutine psi_mswapidxv(ictxt,iicomm,flag,beta,y,idx, & include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: y(:), beta @@ -665,9 +664,8 @@ subroutine psi_mswapidxv(ictxt,iicomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -684,9 +682,6 @@ subroutine psi_mswapidxv(ictxt,iicomm,flag,beta,y,idx, & info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_mswaptran_a.F90 b/base/comm/internals/psi_mswaptran_a.F90 index 65b8e367..3a780142 100644 --- a/base/comm/internals/psi_mswaptran_a.F90 +++ b/base/comm/internals/psi_mswaptran_a.F90 @@ -159,7 +159,7 @@ subroutine psi_mswaptranm(flag,n,beta,y,desc_a,work,info,data) return end subroutine psi_mswaptranm -subroutine psi_mtranidxm(ictxt,iicomm,flag,n,beta,y,idx,& +subroutine psi_mtranidxm(ctxt,icomm,flag,n,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_mtranidxm @@ -174,8 +174,8 @@ subroutine psi_mtranidxm(ictxt,iicomm,flag,n,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag,n integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: y(:,:), beta @@ -183,9 +183,8 @@ subroutine psi_mtranidxm(ictxt,iicomm,flag,n,beta,y,idx,& integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -202,9 +201,6 @@ subroutine psi_mtranidxm(ictxt,iicomm,flag,n,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -656,7 +652,7 @@ end subroutine psi_mswaptranv ! ! ! -subroutine psi_mtranidxv(ictxt,iicomm,flag,beta,y,idx,& +subroutine psi_mtranidxv(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_mtranidxv @@ -671,8 +667,8 @@ subroutine psi_mtranidxv(ictxt,iicomm,flag,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: y(:), beta @@ -680,9 +676,8 @@ subroutine psi_mtranidxv(ictxt,iicomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -699,9 +694,6 @@ subroutine psi_mtranidxv(ictxt,iicomm,flag,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_sswapdata.F90 b/base/comm/internals/psi_sswapdata.F90 index 307195bb..e4f11bd0 100644 --- a/base/comm/internals/psi_sswapdata.F90 +++ b/base/comm/internals/psi_sswapdata.F90 @@ -175,7 +175,7 @@ end subroutine psi_sswapdata_vect ! ! ! -subroutine psi_sswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & +subroutine psi_sswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, & & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_sswap_vidx_vect @@ -192,8 +192,8 @@ subroutine psi_sswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_s_base_vect_type) :: y @@ -203,9 +203,8 @@ subroutine psi_sswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -218,9 +217,6 @@ subroutine psi_sswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -517,7 +513,7 @@ end subroutine psi_sswapdata_multivect ! ! ! -subroutine psi_sswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & +subroutine psi_sswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, & & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_sswap_vidx_multivect @@ -534,8 +530,8 @@ subroutine psi_sswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_s_base_multivect_type) :: y @@ -545,9 +541,8 @@ subroutine psi_sswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -560,9 +555,6 @@ subroutine psi_sswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_sswapdata_a.F90 b/base/comm/internals/psi_sswapdata_a.F90 index 6d74e1ad..044dc141 100644 --- a/base/comm/internals/psi_sswapdata_a.F90 +++ b/base/comm/internals/psi_sswapdata_a.F90 @@ -179,8 +179,8 @@ subroutine psi_sswapidxm(ctxt,icomm,flag,n,beta,y,idx, & ! locals - integer(psb_mpk_) :: np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -197,7 +197,6 @@ subroutine psi_sswapidxm(ctxt,icomm,flag,n,beta,y,idx, & info=psb_success_ name='psi_swap_data' call psb_erractionsave(err_act) - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -641,7 +640,7 @@ end subroutine psi_sswapdatav ! ! ! -subroutine psi_sswapidxv(ictxt,iicomm,flag,beta,y,idx, & +subroutine psi_sswapidxv(ctxt,icomm,flag,beta,y,idx, & & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_sswapidxv @@ -656,8 +655,8 @@ subroutine psi_sswapidxv(ictxt,iicomm,flag,beta,y,idx, & include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info real(psb_spk_) :: y(:), beta @@ -665,9 +664,8 @@ subroutine psi_sswapidxv(ictxt,iicomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -684,9 +682,6 @@ subroutine psi_sswapidxv(ictxt,iicomm,flag,beta,y,idx, & info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_sswaptran.F90 b/base/comm/internals/psi_sswaptran.F90 index 25aa5303..90c4b275 100644 --- a/base/comm/internals/psi_sswaptran.F90 +++ b/base/comm/internals/psi_sswaptran.F90 @@ -176,7 +176,7 @@ end subroutine psi_sswaptran_vect ! ! ! -subroutine psi_stran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& +subroutine psi_stran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_stran_vidx_vect @@ -193,8 +193,8 @@ subroutine psi_stran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_s_base_vect_type) :: y @@ -204,9 +204,8 @@ subroutine psi_stran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -219,9 +218,6 @@ subroutine psi_stran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -528,7 +524,7 @@ subroutine psi_sswaptran_multivect(flag,beta,y,desc_a,work,info,data) ! ! ! -subroutine psi_stran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& +subroutine psi_stran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_stran_vidx_multivect @@ -545,8 +541,8 @@ subroutine psi_stran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_s_base_multivect_type) :: y @@ -556,9 +552,8 @@ subroutine psi_stran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -571,9 +566,6 @@ subroutine psi_stran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_sswaptran_a.F90 b/base/comm/internals/psi_sswaptran_a.F90 index 866456d4..434cec4c 100644 --- a/base/comm/internals/psi_sswaptran_a.F90 +++ b/base/comm/internals/psi_sswaptran_a.F90 @@ -159,7 +159,7 @@ subroutine psi_sswaptranm(flag,n,beta,y,desc_a,work,info,data) return end subroutine psi_sswaptranm -subroutine psi_stranidxm(ictxt,iicomm,flag,n,beta,y,idx,& +subroutine psi_stranidxm(ctxt,icomm,flag,n,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_stranidxm @@ -174,8 +174,8 @@ subroutine psi_stranidxm(ictxt,iicomm,flag,n,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag,n integer(psb_ipk_), intent(out) :: info real(psb_spk_) :: y(:,:), beta @@ -183,9 +183,8 @@ subroutine psi_stranidxm(ictxt,iicomm,flag,n,beta,y,idx,& integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -202,9 +201,6 @@ subroutine psi_stranidxm(ictxt,iicomm,flag,n,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -656,7 +652,7 @@ end subroutine psi_sswaptranv ! ! ! -subroutine psi_stranidxv(ictxt,iicomm,flag,beta,y,idx,& +subroutine psi_stranidxv(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_stranidxv @@ -671,8 +667,8 @@ subroutine psi_stranidxv(ictxt,iicomm,flag,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info real(psb_spk_) :: y(:), beta @@ -680,9 +676,8 @@ subroutine psi_stranidxv(ictxt,iicomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -699,9 +694,6 @@ subroutine psi_stranidxv(ictxt,iicomm,flag,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_zswapdata.F90 b/base/comm/internals/psi_zswapdata.F90 index e892a795..991d6e40 100644 --- a/base/comm/internals/psi_zswapdata.F90 +++ b/base/comm/internals/psi_zswapdata.F90 @@ -175,7 +175,7 @@ end subroutine psi_zswapdata_vect ! ! ! -subroutine psi_zswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & +subroutine psi_zswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, & & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_zswap_vidx_vect @@ -192,8 +192,8 @@ subroutine psi_zswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_z_base_vect_type) :: y @@ -203,9 +203,8 @@ subroutine psi_zswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -218,9 +217,6 @@ subroutine psi_zswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, & info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -517,7 +513,7 @@ end subroutine psi_zswapdata_multivect ! ! ! -subroutine psi_zswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & +subroutine psi_zswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, & & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_zswap_vidx_multivect @@ -534,8 +530,8 @@ subroutine psi_zswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_z_base_multivect_type) :: y @@ -545,9 +541,8 @@ subroutine psi_zswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -560,9 +555,6 @@ subroutine psi_zswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, & info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_zswapdata_a.F90 b/base/comm/internals/psi_zswapdata_a.F90 index 25e1a991..2d265c76 100644 --- a/base/comm/internals/psi_zswapdata_a.F90 +++ b/base/comm/internals/psi_zswapdata_a.F90 @@ -179,8 +179,8 @@ subroutine psi_zswapidxm(ctxt,icomm,flag,n,beta,y,idx, & ! locals - integer(psb_mpk_) :: np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -197,7 +197,6 @@ subroutine psi_zswapidxm(ctxt,icomm,flag,n,beta,y,idx, & info=psb_success_ name='psi_swap_data' call psb_erractionsave(err_act) - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -641,7 +640,7 @@ end subroutine psi_zswapdatav ! ! ! -subroutine psi_zswapidxv(ictxt,iicomm,flag,beta,y,idx, & +subroutine psi_zswapidxv(ctxt,icomm,flag,beta,y,idx, & & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_zswapidxv @@ -656,8 +655,8 @@ subroutine psi_zswapidxv(ictxt,iicomm,flag,beta,y,idx, & include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info complex(psb_dpk_) :: y(:), beta @@ -665,9 +664,8 @@ subroutine psi_zswapidxv(ictxt,iicomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -684,9 +682,6 @@ subroutine psi_zswapidxv(ictxt,iicomm,flag,beta,y,idx, & info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_zswaptran.F90 b/base/comm/internals/psi_zswaptran.F90 index 74fc4221..f027519f 100644 --- a/base/comm/internals/psi_zswaptran.F90 +++ b/base/comm/internals/psi_zswaptran.F90 @@ -176,7 +176,7 @@ end subroutine psi_zswaptran_vect ! ! ! -subroutine psi_ztran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& +subroutine psi_ztran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_ztran_vidx_vect @@ -193,8 +193,8 @@ subroutine psi_ztran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_z_base_vect_type) :: y @@ -204,9 +204,8 @@ subroutine psi_ztran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -219,9 +218,6 @@ subroutine psi_ztran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -528,7 +524,7 @@ subroutine psi_zswaptran_multivect(flag,beta,y,desc_a,work,info,data) ! ! ! -subroutine psi_ztran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& +subroutine psi_ztran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_ztran_vidx_multivect @@ -545,8 +541,8 @@ subroutine psi_ztran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_z_base_multivect_type) :: y @@ -556,9 +552,8 @@ subroutine psi_ztran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) integer(psb_ipk_) :: nesd, nerv,& & err_act, i, idx_pt, totsnd_, totrcv_,& @@ -571,9 +566,6 @@ subroutine psi_ztran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/comm/internals/psi_zswaptran_a.F90 b/base/comm/internals/psi_zswaptran_a.F90 index 4984b51b..508d4045 100644 --- a/base/comm/internals/psi_zswaptran_a.F90 +++ b/base/comm/internals/psi_zswaptran_a.F90 @@ -159,7 +159,7 @@ subroutine psi_zswaptranm(flag,n,beta,y,desc_a,work,info,data) return end subroutine psi_zswaptranm -subroutine psi_ztranidxm(ictxt,iicomm,flag,n,beta,y,idx,& +subroutine psi_ztranidxm(ctxt,icomm,flag,n,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_ztranidxm @@ -174,8 +174,8 @@ subroutine psi_ztranidxm(ictxt,iicomm,flag,n,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag,n integer(psb_ipk_), intent(out) :: info complex(psb_dpk_) :: y(:,:), beta @@ -183,9 +183,8 @@ subroutine psi_ztranidxm(ictxt,iicomm,flag,n,beta,y,idx,& integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -202,9 +201,6 @@ subroutine psi_ztranidxm(ictxt,iicomm,flag,n,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ @@ -656,7 +652,7 @@ end subroutine psi_zswaptranv ! ! ! -subroutine psi_ztranidxv(ictxt,iicomm,flag,beta,y,idx,& +subroutine psi_ztranidxv(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_ztranidxv @@ -671,8 +667,8 @@ subroutine psi_ztranidxv(ictxt,iicomm,flag,beta,y,idx,& include 'mpif.h' #endif - type(psb_ctxt_type), intent(in) :: ictxt - integer(psb_mpk_), intent(in) :: iicomm + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info complex(psb_dpk_) :: y(:), beta @@ -680,9 +676,8 @@ subroutine psi_ztranidxv(ictxt,iicomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: icomm, np, me,& - & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd integer(psb_ipk_) :: nesd, nerv,& @@ -699,9 +694,6 @@ subroutine psi_ztranidxv(ictxt,iicomm,flag,beta,y,idx,& info=psb_success_ name='psi_swap_tran' call psb_erractionsave(err_act) - ctxt = ictxt - icomm = iicomm - call psb_info(ctxt,me,np) if (np == -1) then info=psb_err_context_error_ diff --git a/base/internals/psi_bld_glb_dep_list.F90 b/base/internals/psi_bld_glb_dep_list.F90 index e415ffd2..f43e5f17 100644 --- a/base/internals/psi_bld_glb_dep_list.F90 +++ b/base/internals/psi_bld_glb_dep_list.F90 @@ -29,8 +29,8 @@ ! POSSIBILITY OF SUCH DAMAGE. ! ! -subroutine psi_i_bld_glb_csr_dep_list(ctxt,loc_dl,length_dl,c_dep_list,dl_ptr,info) - use psi_mod, psb_protect_name => psi_i_bld_glb_csr_dep_list +subroutine psi_i_bld_glb_dep_list(ctxt,loc_dl,length_dl,c_dep_list,dl_ptr,info) + use psi_mod, psb_protect_name => psi_i_bld_glb_dep_list #ifdef MPI_MOD use mpi #endif @@ -123,4 +123,4 @@ subroutine psi_i_bld_glb_csr_dep_list(ctxt,loc_dl,length_dl,c_dep_list,dl_ptr,in return -end subroutine psi_i_bld_glb_csr_dep_list +end subroutine psi_i_bld_glb_dep_list diff --git a/base/modules/Makefile b/base/modules/Makefile index 31c509ad..8d50011f 100644 --- a/base/modules/Makefile +++ b/base/modules/Makefile @@ -257,7 +257,7 @@ serial/psb_c_csc_mat_mod.o serial/psb_c_csr_mat_mod.o serial/psb_lc_csr_mat_mod. serial/psb_z_csc_mat_mod.o serial/psb_z_csr_mat_mod.o serial/psb_lz_csr_mat_mod.o: serial/psb_z_base_mat_mod.o serial/psb_mat_mod.o: serial/psb_vect_mod.o serial/psb_s_mat_mod.o serial/psb_d_mat_mod.o serial/psb_c_mat_mod.o serial/psb_z_mat_mod.o -serial/psb_serial_mod.o: serial/psb_s_serial_mod.o serial/psb_d_serial_mod.o serial/psb_c_serial_mod.o serial/psb_z_serial_mod.o +serial/psb_serial_mod.o: serial/psb_s_serial_mod.o serial/psb_d_serial_mod.o serial/psb_c_serial_mod.o serial/psb_z_serial_mod.o auxil/psi_serial_mod.o serial/psb_i_vect_mod.o: serial/psb_i_base_vect_mod.o serial/psb_l_vect_mod.o: serial/psb_l_base_vect_mod.o serial/psb_i_vect_mod.o serial/psb_s_vect_mod.o: serial/psb_s_base_vect_mod.o serial/psb_i_vect_mod.o diff --git a/base/modules/auxil/psi_c_serial_mod.f90 b/base/modules/auxil/psi_c_serial_mod.f90 index 05145c1c..f017f350 100644 --- a/base/modules/auxil/psi_c_serial_mod.f90 +++ b/base/modules/auxil/psi_c_serial_mod.f90 @@ -32,24 +32,40 @@ module psi_c_serial_mod use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_, psb_spk_ - interface psb_gelp + interface psb_gelp ! 2-D version - subroutine psb_cgelp(trans,iperm,x,info) - import :: psb_ipk_, psb_spk_ + subroutine psb_m_cgelp(trans,iperm,x,info) + import :: psb_ipk_, psb_mpk_, psb_spk_ implicit none complex(psb_spk_), intent(inout) :: x(:,:) - integer(psb_ipk_), intent(in) :: iperm(:) + integer(psb_mpk_), intent(in) :: iperm(:) integer(psb_ipk_), intent(out) :: info character, intent(in) :: trans - end subroutine psb_cgelp - subroutine psb_cgelpv(trans,iperm,x,info) - import :: psb_ipk_, psb_spk_ + end subroutine psb_m_cgelp + subroutine psb_m_cgelpv(trans,iperm,x,info) + import :: psb_ipk_, psb_mpk_,psb_spk_ + implicit none + complex(psb_spk_), intent(inout) :: x(:) + integer(psb_mpk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + end subroutine psb_m_cgelpv + subroutine psb_e_cgelp(trans,iperm,x,info) + import :: psb_ipk_, psb_epk_, psb_spk_ + implicit none + complex(psb_spk_), intent(inout) :: x(:,:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + end subroutine psb_e_cgelp + subroutine psb_e_cgelpv(trans,iperm,x,info) + import :: psb_ipk_, psb_epk_, psb_spk_ implicit none complex(psb_spk_), intent(inout) :: x(:) - integer(psb_ipk_), intent(in) :: iperm(:) + integer(psb_epk_), intent(in) :: iperm(:) integer(psb_ipk_), intent(out) :: info character, intent(in) :: trans - end subroutine psb_cgelpv + end subroutine psb_e_cgelpv end interface psb_gelp interface psb_geaxpby diff --git a/base/modules/auxil/psi_d_serial_mod.f90 b/base/modules/auxil/psi_d_serial_mod.f90 index 0bea1bce..c27aa600 100644 --- a/base/modules/auxil/psi_d_serial_mod.f90 +++ b/base/modules/auxil/psi_d_serial_mod.f90 @@ -32,24 +32,40 @@ module psi_d_serial_mod use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_, psb_dpk_ - interface psb_gelp + interface psb_gelp ! 2-D version - subroutine psb_dgelp(trans,iperm,x,info) - import :: psb_ipk_, psb_dpk_ + subroutine psb_m_dgelp(trans,iperm,x,info) + import :: psb_ipk_, psb_mpk_, psb_dpk_ implicit none real(psb_dpk_), intent(inout) :: x(:,:) - integer(psb_ipk_), intent(in) :: iperm(:) + integer(psb_mpk_), intent(in) :: iperm(:) integer(psb_ipk_), intent(out) :: info character, intent(in) :: trans - end subroutine psb_dgelp - subroutine psb_dgelpv(trans,iperm,x,info) - import :: psb_ipk_, psb_dpk_ + end subroutine psb_m_dgelp + subroutine psb_m_dgelpv(trans,iperm,x,info) + import :: psb_ipk_, psb_mpk_,psb_dpk_ + implicit none + real(psb_dpk_), intent(inout) :: x(:) + integer(psb_mpk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + end subroutine psb_m_dgelpv + subroutine psb_e_dgelp(trans,iperm,x,info) + import :: psb_ipk_, psb_epk_, psb_dpk_ + implicit none + real(psb_dpk_), intent(inout) :: x(:,:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + end subroutine psb_e_dgelp + subroutine psb_e_dgelpv(trans,iperm,x,info) + import :: psb_ipk_, psb_epk_, psb_dpk_ implicit none real(psb_dpk_), intent(inout) :: x(:) - integer(psb_ipk_), intent(in) :: iperm(:) + integer(psb_epk_), intent(in) :: iperm(:) integer(psb_ipk_), intent(out) :: info character, intent(in) :: trans - end subroutine psb_dgelpv + end subroutine psb_e_dgelpv end interface psb_gelp interface psb_geaxpby diff --git a/base/modules/auxil/psi_e_serial_mod.f90 b/base/modules/auxil/psi_e_serial_mod.f90 index f8b9694d..99a91985 100644 --- a/base/modules/auxil/psi_e_serial_mod.f90 +++ b/base/modules/auxil/psi_e_serial_mod.f90 @@ -32,24 +32,40 @@ module psi_e_serial_mod use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ - interface psb_gelp + interface psb_gelp ! 2-D version - subroutine psb_egelp(trans,iperm,x,info) + subroutine psb_m_egelp(trans,iperm,x,info) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none integer(psb_epk_), intent(inout) :: x(:,:) - integer(psb_ipk_), intent(in) :: iperm(:) + integer(psb_mpk_), intent(in) :: iperm(:) integer(psb_ipk_), intent(out) :: info character, intent(in) :: trans - end subroutine psb_egelp - subroutine psb_egelpv(trans,iperm,x,info) + end subroutine psb_m_egelp + subroutine psb_m_egelpv(trans,iperm,x,info) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none integer(psb_epk_), intent(inout) :: x(:) - integer(psb_ipk_), intent(in) :: iperm(:) + integer(psb_mpk_), intent(in) :: iperm(:) integer(psb_ipk_), intent(out) :: info character, intent(in) :: trans - end subroutine psb_egelpv + end subroutine psb_m_egelpv + subroutine psb_e_egelp(trans,iperm,x,info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_epk_), intent(inout) :: x(:,:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + end subroutine psb_e_egelp + subroutine psb_e_egelpv(trans,iperm,x,info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_epk_), intent(inout) :: x(:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + end subroutine psb_e_egelpv end interface psb_gelp interface psb_geaxpby diff --git a/base/modules/auxil/psi_i2_serial_mod.f90 b/base/modules/auxil/psi_i2_serial_mod.f90 index bc0df7c5..565955e7 100644 --- a/base/modules/auxil/psi_i2_serial_mod.f90 +++ b/base/modules/auxil/psi_i2_serial_mod.f90 @@ -32,24 +32,40 @@ module psi_i2_serial_mod use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ - interface psb_gelp + interface psb_gelp ! 2-D version - subroutine psb_i2gelp(trans,iperm,x,info) + subroutine psb_m_i2gelp(trans,iperm,x,info) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none integer(psb_i2pk_), intent(inout) :: x(:,:) - integer(psb_ipk_), intent(in) :: iperm(:) + integer(psb_mpk_), intent(in) :: iperm(:) integer(psb_ipk_), intent(out) :: info character, intent(in) :: trans - end subroutine psb_i2gelp - subroutine psb_i2gelpv(trans,iperm,x,info) + end subroutine psb_m_i2gelp + subroutine psb_m_i2gelpv(trans,iperm,x,info) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none integer(psb_i2pk_), intent(inout) :: x(:) - integer(psb_ipk_), intent(in) :: iperm(:) + integer(psb_mpk_), intent(in) :: iperm(:) integer(psb_ipk_), intent(out) :: info character, intent(in) :: trans - end subroutine psb_i2gelpv + end subroutine psb_m_i2gelpv + subroutine psb_e_i2gelp(trans,iperm,x,info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_i2pk_), intent(inout) :: x(:,:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + end subroutine psb_e_i2gelp + subroutine psb_e_i2gelpv(trans,iperm,x,info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + end subroutine psb_e_i2gelpv end interface psb_gelp interface psb_geaxpby diff --git a/base/modules/auxil/psi_m_serial_mod.f90 b/base/modules/auxil/psi_m_serial_mod.f90 index 2acb7482..17ea8dc4 100644 --- a/base/modules/auxil/psi_m_serial_mod.f90 +++ b/base/modules/auxil/psi_m_serial_mod.f90 @@ -32,24 +32,40 @@ module psi_m_serial_mod use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ - interface psb_gelp + interface psb_gelp ! 2-D version - subroutine psb_mgelp(trans,iperm,x,info) + subroutine psb_m_mgelp(trans,iperm,x,info) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none integer(psb_mpk_), intent(inout) :: x(:,:) - integer(psb_ipk_), intent(in) :: iperm(:) + integer(psb_mpk_), intent(in) :: iperm(:) integer(psb_ipk_), intent(out) :: info character, intent(in) :: trans - end subroutine psb_mgelp - subroutine psb_mgelpv(trans,iperm,x,info) + end subroutine psb_m_mgelp + subroutine psb_m_mgelpv(trans,iperm,x,info) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none integer(psb_mpk_), intent(inout) :: x(:) - integer(psb_ipk_), intent(in) :: iperm(:) + integer(psb_mpk_), intent(in) :: iperm(:) integer(psb_ipk_), intent(out) :: info character, intent(in) :: trans - end subroutine psb_mgelpv + end subroutine psb_m_mgelpv + subroutine psb_e_mgelp(trans,iperm,x,info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_mpk_), intent(inout) :: x(:,:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + end subroutine psb_e_mgelp + subroutine psb_e_mgelpv(trans,iperm,x,info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_mpk_), intent(inout) :: x(:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + end subroutine psb_e_mgelpv end interface psb_gelp interface psb_geaxpby diff --git a/base/modules/auxil/psi_s_serial_mod.f90 b/base/modules/auxil/psi_s_serial_mod.f90 index ac3dbb62..ed7b5d9f 100644 --- a/base/modules/auxil/psi_s_serial_mod.f90 +++ b/base/modules/auxil/psi_s_serial_mod.f90 @@ -32,24 +32,40 @@ module psi_s_serial_mod use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_, psb_spk_ - interface psb_gelp + interface psb_gelp ! 2-D version - subroutine psb_sgelp(trans,iperm,x,info) - import :: psb_ipk_, psb_spk_ + subroutine psb_m_sgelp(trans,iperm,x,info) + import :: psb_ipk_, psb_mpk_, psb_spk_ implicit none real(psb_spk_), intent(inout) :: x(:,:) - integer(psb_ipk_), intent(in) :: iperm(:) + integer(psb_mpk_), intent(in) :: iperm(:) integer(psb_ipk_), intent(out) :: info character, intent(in) :: trans - end subroutine psb_sgelp - subroutine psb_sgelpv(trans,iperm,x,info) - import :: psb_ipk_, psb_spk_ + end subroutine psb_m_sgelp + subroutine psb_m_sgelpv(trans,iperm,x,info) + import :: psb_ipk_, psb_mpk_,psb_spk_ + implicit none + real(psb_spk_), intent(inout) :: x(:) + integer(psb_mpk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + end subroutine psb_m_sgelpv + subroutine psb_e_sgelp(trans,iperm,x,info) + import :: psb_ipk_, psb_epk_, psb_spk_ + implicit none + real(psb_spk_), intent(inout) :: x(:,:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + end subroutine psb_e_sgelp + subroutine psb_e_sgelpv(trans,iperm,x,info) + import :: psb_ipk_, psb_epk_, psb_spk_ implicit none real(psb_spk_), intent(inout) :: x(:) - integer(psb_ipk_), intent(in) :: iperm(:) + integer(psb_epk_), intent(in) :: iperm(:) integer(psb_ipk_), intent(out) :: info character, intent(in) :: trans - end subroutine psb_sgelpv + end subroutine psb_e_sgelpv end interface psb_gelp interface psb_geaxpby diff --git a/base/modules/auxil/psi_z_serial_mod.f90 b/base/modules/auxil/psi_z_serial_mod.f90 index ee148ef2..9de8451b 100644 --- a/base/modules/auxil/psi_z_serial_mod.f90 +++ b/base/modules/auxil/psi_z_serial_mod.f90 @@ -32,24 +32,40 @@ module psi_z_serial_mod use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_, psb_dpk_ - interface psb_gelp + interface psb_gelp ! 2-D version - subroutine psb_zgelp(trans,iperm,x,info) - import :: psb_ipk_, psb_dpk_ + subroutine psb_m_zgelp(trans,iperm,x,info) + import :: psb_ipk_, psb_mpk_, psb_dpk_ implicit none complex(psb_dpk_), intent(inout) :: x(:,:) - integer(psb_ipk_), intent(in) :: iperm(:) + integer(psb_mpk_), intent(in) :: iperm(:) integer(psb_ipk_), intent(out) :: info character, intent(in) :: trans - end subroutine psb_zgelp - subroutine psb_zgelpv(trans,iperm,x,info) - import :: psb_ipk_, psb_dpk_ + end subroutine psb_m_zgelp + subroutine psb_m_zgelpv(trans,iperm,x,info) + import :: psb_ipk_, psb_mpk_,psb_dpk_ + implicit none + complex(psb_dpk_), intent(inout) :: x(:) + integer(psb_mpk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + end subroutine psb_m_zgelpv + subroutine psb_e_zgelp(trans,iperm,x,info) + import :: psb_ipk_, psb_epk_, psb_dpk_ + implicit none + complex(psb_dpk_), intent(inout) :: x(:,:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + end subroutine psb_e_zgelp + subroutine psb_e_zgelpv(trans,iperm,x,info) + import :: psb_ipk_, psb_epk_, psb_dpk_ implicit none complex(psb_dpk_), intent(inout) :: x(:) - integer(psb_ipk_), intent(in) :: iperm(:) + integer(psb_epk_), intent(in) :: iperm(:) integer(psb_ipk_), intent(out) :: info character, intent(in) :: trans - end subroutine psb_zgelpv + end subroutine psb_e_zgelpv end interface psb_gelp interface psb_geaxpby diff --git a/base/modules/comm/psi_c_comm_v_mod.f90 b/base/modules/comm/psi_c_comm_v_mod.f90 index 47eb7fdf..7d10a028 100644 --- a/base/modules/comm/psi_c_comm_v_mod.f90 +++ b/base/modules/comm/psi_c_comm_v_mod.f90 @@ -56,11 +56,11 @@ module psi_c_comm_v_mod type(psb_desc_type), target :: desc_a integer(psb_ipk_), optional :: data end subroutine psi_cswapdata_multivect - subroutine psi_cswap_vidx_vect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_cswap_vidx_vect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_c_base_vect_type) :: y @@ -69,11 +69,11 @@ module psi_c_comm_v_mod class(psb_i_base_vect_type), intent(inout) :: idx integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv end subroutine psi_cswap_vidx_vect - subroutine psi_cswap_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_cswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_c_base_multivect_type) :: y @@ -106,11 +106,11 @@ module psi_c_comm_v_mod type(psb_desc_type), target :: desc_a integer(psb_ipk_), optional :: data end subroutine psi_cswaptran_multivect - subroutine psi_ctran_vidx_vect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_ctran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_c_base_vect_type) :: y @@ -119,11 +119,11 @@ module psi_c_comm_v_mod class(psb_i_base_vect_type), intent(inout) :: idx integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv end subroutine psi_ctran_vidx_vect - subroutine psi_ctran_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_ctran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_c_base_multivect_type) :: y diff --git a/base/modules/comm/psi_d_comm_v_mod.f90 b/base/modules/comm/psi_d_comm_v_mod.f90 index 6b7cdfd6..b7a902da 100644 --- a/base/modules/comm/psi_d_comm_v_mod.f90 +++ b/base/modules/comm/psi_d_comm_v_mod.f90 @@ -56,11 +56,11 @@ module psi_d_comm_v_mod type(psb_desc_type), target :: desc_a integer(psb_ipk_), optional :: data end subroutine psi_dswapdata_multivect - subroutine psi_dswap_vidx_vect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_d_base_vect_type) :: y @@ -69,11 +69,11 @@ module psi_d_comm_v_mod class(psb_i_base_vect_type), intent(inout) :: idx integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv end subroutine psi_dswap_vidx_vect - subroutine psi_dswap_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_dswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_d_base_multivect_type) :: y @@ -106,11 +106,11 @@ module psi_d_comm_v_mod type(psb_desc_type), target :: desc_a integer(psb_ipk_), optional :: data end subroutine psi_dswaptran_multivect - subroutine psi_dtran_vidx_vect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_dtran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_d_base_vect_type) :: y @@ -119,11 +119,11 @@ module psi_d_comm_v_mod class(psb_i_base_vect_type), intent(inout) :: idx integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv end subroutine psi_dtran_vidx_vect - subroutine psi_dtran_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_dtran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_d_base_multivect_type) :: y diff --git a/base/modules/comm/psi_i_comm_v_mod.f90 b/base/modules/comm/psi_i_comm_v_mod.f90 index 4072a6c4..2fe3948c 100644 --- a/base/modules/comm/psi_i_comm_v_mod.f90 +++ b/base/modules/comm/psi_i_comm_v_mod.f90 @@ -57,11 +57,11 @@ module psi_i_comm_v_mod type(psb_desc_type), target :: desc_a integer(psb_ipk_), optional :: data end subroutine psi_iswapdata_multivect - subroutine psi_iswap_vidx_vect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_iswap_vidx_vect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_i_base_vect_type) :: y @@ -70,11 +70,11 @@ module psi_i_comm_v_mod class(psb_i_base_vect_type), intent(inout) :: idx integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv end subroutine psi_iswap_vidx_vect - subroutine psi_iswap_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_iswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_i_base_multivect_type) :: y @@ -107,11 +107,11 @@ module psi_i_comm_v_mod type(psb_desc_type), target :: desc_a integer(psb_ipk_), optional :: data end subroutine psi_iswaptran_multivect - subroutine psi_itran_vidx_vect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_itran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_i_base_vect_type) :: y @@ -120,11 +120,11 @@ module psi_i_comm_v_mod class(psb_i_base_vect_type), intent(inout) :: idx integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv end subroutine psi_itran_vidx_vect - subroutine psi_itran_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_itran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_i_base_multivect_type) :: y diff --git a/base/modules/comm/psi_l_comm_v_mod.f90 b/base/modules/comm/psi_l_comm_v_mod.f90 index b3b55a0d..b61a17b7 100644 --- a/base/modules/comm/psi_l_comm_v_mod.f90 +++ b/base/modules/comm/psi_l_comm_v_mod.f90 @@ -58,11 +58,11 @@ module psi_l_comm_v_mod type(psb_desc_type), target :: desc_a integer(psb_ipk_), optional :: data end subroutine psi_lswapdata_multivect - subroutine psi_lswap_vidx_vect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_lswap_vidx_vect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_l_base_vect_type) :: y @@ -71,11 +71,11 @@ module psi_l_comm_v_mod class(psb_i_base_vect_type), intent(inout) :: idx integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv end subroutine psi_lswap_vidx_vect - subroutine psi_lswap_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_lswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_l_base_multivect_type) :: y @@ -108,11 +108,11 @@ module psi_l_comm_v_mod type(psb_desc_type), target :: desc_a integer(psb_ipk_), optional :: data end subroutine psi_lswaptran_multivect - subroutine psi_ltran_vidx_vect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_ltran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_l_base_vect_type) :: y @@ -121,11 +121,11 @@ module psi_l_comm_v_mod class(psb_i_base_vect_type), intent(inout) :: idx integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv end subroutine psi_ltran_vidx_vect - subroutine psi_ltran_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_ltran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_l_base_multivect_type) :: y diff --git a/base/modules/comm/psi_s_comm_v_mod.f90 b/base/modules/comm/psi_s_comm_v_mod.f90 index a2eb0bcf..1cf4d53e 100644 --- a/base/modules/comm/psi_s_comm_v_mod.f90 +++ b/base/modules/comm/psi_s_comm_v_mod.f90 @@ -56,11 +56,11 @@ module psi_s_comm_v_mod type(psb_desc_type), target :: desc_a integer(psb_ipk_), optional :: data end subroutine psi_sswapdata_multivect - subroutine psi_sswap_vidx_vect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_sswap_vidx_vect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_s_base_vect_type) :: y @@ -69,11 +69,11 @@ module psi_s_comm_v_mod class(psb_i_base_vect_type), intent(inout) :: idx integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv end subroutine psi_sswap_vidx_vect - subroutine psi_sswap_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_sswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_s_base_multivect_type) :: y @@ -106,11 +106,11 @@ module psi_s_comm_v_mod type(psb_desc_type), target :: desc_a integer(psb_ipk_), optional :: data end subroutine psi_sswaptran_multivect - subroutine psi_stran_vidx_vect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_stran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_s_base_vect_type) :: y @@ -119,11 +119,11 @@ module psi_s_comm_v_mod class(psb_i_base_vect_type), intent(inout) :: idx integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv end subroutine psi_stran_vidx_vect - subroutine psi_stran_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_stran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_s_base_multivect_type) :: y diff --git a/base/modules/comm/psi_z_comm_v_mod.f90 b/base/modules/comm/psi_z_comm_v_mod.f90 index 02c1b8d8..de8e1117 100644 --- a/base/modules/comm/psi_z_comm_v_mod.f90 +++ b/base/modules/comm/psi_z_comm_v_mod.f90 @@ -56,11 +56,11 @@ module psi_z_comm_v_mod type(psb_desc_type), target :: desc_a integer(psb_ipk_), optional :: data end subroutine psi_zswapdata_multivect - subroutine psi_zswap_vidx_vect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_zswap_vidx_vect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_z_base_vect_type) :: y @@ -69,11 +69,11 @@ module psi_z_comm_v_mod class(psb_i_base_vect_type), intent(inout) :: idx integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv end subroutine psi_zswap_vidx_vect - subroutine psi_zswap_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_zswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_z_base_multivect_type) :: y @@ -106,11 +106,11 @@ module psi_z_comm_v_mod type(psb_desc_type), target :: desc_a integer(psb_ipk_), optional :: data end subroutine psi_zswaptran_multivect - subroutine psi_ztran_vidx_vect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_ztran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_z_base_vect_type) :: y @@ -119,11 +119,11 @@ module psi_z_comm_v_mod class(psb_i_base_vect_type), intent(inout) :: idx integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv end subroutine psi_ztran_vidx_vect - subroutine psi_ztran_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,& + subroutine psi_ztran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_mpk_), intent(in) :: iicomm + integer(psb_mpk_), intent(in) :: icomm integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info class(psb_z_base_multivect_type) :: y diff --git a/base/modules/desc/psb_desc_mod.F90 b/base/modules/desc/psb_desc_mod.F90 index cc003759..c1c98d51 100644 --- a/base/modules/desc/psb_desc_mod.F90 +++ b/base/modules/desc/psb_desc_mod.F90 @@ -613,8 +613,10 @@ contains if (allocated(desc%indxmap)) then val = desc%indxmap%get_ctxt() else - call psb_errpush(psb_err_invalid_cd_state_,'psb_cd_get_context') - call psb_error() + ! At this point, val should a non-ALLOCATED + ! ctxt component, which suits us just fine. + !call psb_errpush(psb_err_invalid_cd_state_,'psb_cd_get_context') + !call psb_error() end if end function psb_cd_get_context diff --git a/base/modules/desc/psb_indx_map_mod.f90 b/base/modules/desc/psb_indx_map_mod.f90 index d18458f0..0c0d8199 100644 --- a/base/modules/desc/psb_indx_map_mod.f90 +++ b/base/modules/desc/psb_indx_map_mod.f90 @@ -224,6 +224,7 @@ module psb_indx_map_mod generic, public :: qry_halo_owner => qry_halo_owner_s, qry_halo_owner_v procedure, pass(idxmap) :: fnd_owner => psi_indx_map_fnd_owner + procedure, pass(idxmap) :: init_null => base_init_null procedure, pass(idxmap) :: init_vl => base_init_vl generic, public :: init => init_vl @@ -242,7 +243,7 @@ module psb_indx_map_mod & base_ll2gs1, base_ll2gs2, base_ll2gv1, base_ll2gv2,& & base_lg2ls1, base_lg2ls2, base_lg2lv1, base_lg2lv2,& & base_lg2ls1_ins, base_lg2ls2_ins, base_lg2lv1_ins,& - & base_lg2lv2_ins, base_init_vl, base_is_null,& + & base_lg2lv2_ins, base_init_vl, base_is_null, base_init_null, & & base_row_extendable, base_clone, base_cpy, base_reinit, & & base_set_halo_owner, base_get_halo_owner, & & base_qry_halo_owner_s, base_qry_halo_owner_v,& @@ -1345,6 +1346,17 @@ contains end subroutine base_set_null + subroutine base_init_null(idxmap,ctxt,info) + class(psb_indx_map), intent(inout) :: idxmap + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_ipk_), intent(out) :: info + + call idxmap%set_null() + idxmap%ctxt = ctxt + info = 0 + return + end subroutine base_init_null + subroutine base_init_vl(idxmap,ctxt,vl,info) use psb_penv_mod use psb_error_mod @@ -1414,7 +1426,7 @@ contains call psb_get_erraction(err_act) outmap%state = idxmap%state - outmap%ctxt = idxmap%ctxt + outmap%ctxt = idxmap%ctxt outmap%mpic = idxmap%mpic outmap%global_rows = idxmap%global_rows outmap%global_cols = idxmap%global_cols diff --git a/base/modules/penv/psi_d_collective_mod.F90 b/base/modules/penv/psi_d_collective_mod.F90 index eabe5b3f..12d5f38b 100644 --- a/base/modules/penv/psi_d_collective_mod.F90 +++ b/base/modules/penv/psi_d_collective_mod.F90 @@ -105,8 +105,6 @@ contains integer(psb_mpk_) :: root_ real(psb_dpk_) :: dat_ integer(psb_mpk_) :: iam, np, info, icomm - integer(psb_ipk_) :: iinfo - #if !defined(SERIAL_MPI) call psb_info(ctxt,iam,np) diff --git a/base/modules/penv/psi_e_collective_mod.F90 b/base/modules/penv/psi_e_collective_mod.F90 index 54d85347..215446c0 100644 --- a/base/modules/penv/psi_e_collective_mod.F90 +++ b/base/modules/penv/psi_e_collective_mod.F90 @@ -102,8 +102,6 @@ contains integer(psb_mpk_) :: root_ integer(psb_epk_) :: dat_ integer(psb_mpk_) :: iam, np, info, icomm - integer(psb_ipk_) :: iinfo - #if !defined(SERIAL_MPI) call psb_info(ctxt,iam,np) diff --git a/base/modules/penv/psi_i2_collective_mod.F90 b/base/modules/penv/psi_i2_collective_mod.F90 index 64a49ae3..781653d4 100644 --- a/base/modules/penv/psi_i2_collective_mod.F90 +++ b/base/modules/penv/psi_i2_collective_mod.F90 @@ -102,8 +102,6 @@ contains integer(psb_mpk_) :: root_ integer(psb_i2pk_) :: dat_ integer(psb_mpk_) :: iam, np, info, icomm - integer(psb_ipk_) :: iinfo - #if !defined(SERIAL_MPI) call psb_info(ctxt,iam,np) diff --git a/base/modules/penv/psi_m_collective_mod.F90 b/base/modules/penv/psi_m_collective_mod.F90 index 462a7221..8fdea824 100644 --- a/base/modules/penv/psi_m_collective_mod.F90 +++ b/base/modules/penv/psi_m_collective_mod.F90 @@ -102,8 +102,6 @@ contains integer(psb_mpk_) :: root_ integer(psb_mpk_) :: dat_ integer(psb_mpk_) :: iam, np, info, icomm - integer(psb_ipk_) :: iinfo - #if !defined(SERIAL_MPI) call psb_info(ctxt,iam,np) diff --git a/base/modules/penv/psi_p2p_mod.F90 b/base/modules/penv/psi_p2p_mod.F90 index 84438f96..f7262378 100644 --- a/base/modules/penv/psi_p2p_mod.F90 +++ b/base/modules/penv/psi_p2p_mod.F90 @@ -216,10 +216,11 @@ contains logical, intent(out) :: dat(:) integer(psb_mpk_), intent(in) :: src integer(psb_mpk_) :: info - integer(psb_mpk_) :: status(mpi_status_size) + integer(psb_mpk_) :: status(mpi_status_size), icomm #if defined(SERIAL_MPI) #else - call mpi_recv(dat,size(dat),mpi_logical,src,psb_logical_tag,ctxt,status,info) + icomm = psb_get_mpi_comm(ctxt) + call mpi_recv(dat,size(dat),mpi_logical,src,psb_logical_tag,icomm,status,info) call psb_test_nodes(psb_mesg_queue) #endif diff --git a/base/modules/penv/psi_s_collective_mod.F90 b/base/modules/penv/psi_s_collective_mod.F90 index 30a10524..82f96aac 100644 --- a/base/modules/penv/psi_s_collective_mod.F90 +++ b/base/modules/penv/psi_s_collective_mod.F90 @@ -105,8 +105,6 @@ contains integer(psb_mpk_) :: root_ real(psb_spk_) :: dat_ integer(psb_mpk_) :: iam, np, info, icomm - integer(psb_ipk_) :: iinfo - #if !defined(SERIAL_MPI) call psb_info(ctxt,iam,np) diff --git a/base/modules/psi_i_mod.F90 b/base/modules/psi_i_mod.F90 index 31e5d461..e8cf2706 100644 --- a/base/modules/psi_i_mod.F90 +++ b/base/modules/psi_i_mod.F90 @@ -110,13 +110,13 @@ module psi_i_mod end interface interface psi_bld_glb_dep_list - subroutine psi_i_bld_glb_csr_dep_list(ctxt,loc_dl,length_dl,c_dep_list,dl_ptr,info) + subroutine psi_i_bld_glb_dep_list(ctxt,loc_dl,length_dl,c_dep_list,dl_ptr,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_ipk_), intent(in) :: loc_dl(:), length_dl(0:) + integer(psb_ipk_), intent(in) :: loc_dl(:), length_dl(0:) integer(psb_ipk_), allocatable, intent(out) :: c_dep_list(:), dl_ptr(:) - integer(psb_ipk_), intent(out) :: info - end subroutine psi_i_bld_glb_csr_dep_list + integer(psb_ipk_), intent(out) :: info + end subroutine psi_i_bld_glb_dep_list end interface interface psi_extract_loc_dl diff --git a/base/modules/serial/psb_c_base_vect_mod.f90 b/base/modules/serial/psb_c_base_vect_mod.F90 similarity index 97% rename from base/modules/serial/psb_c_base_vect_mod.f90 rename to base/modules/serial/psb_c_base_vect_mod.F90 index f59e238f..e68fef6c 100644 --- a/base/modules/serial/psb_c_base_vect_mod.f90 +++ b/base/modules/serial/psb_c_base_vect_mod.F90 @@ -266,14 +266,21 @@ contains complex(psb_spk_), intent(in) :: this(:) class(psb_c_base_vect_type), intent(inout) :: x integer(psb_ipk_) :: info - + integer(psb_ipk_) :: i + call psb_realloc(size(this),x%v,info) if (info /= 0) then call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') return end if +#if defined (OPENMP) + !$omp parallel do private(i) + do i = 1, size(this) + x%v(i) = this(i) + end do +#else x%v(:) = this(:) - +#endif end subroutine c_base_bld_x ! @@ -403,7 +410,6 @@ contains case(psb_dupl_ovwrt_) do i = 1, n !loop over all val's rows - ! row actual block row if ((1 <= irl(i)).and.(irl(i) <= isz)) then ! this row belongs to me @@ -783,7 +789,7 @@ contains integer(psb_ipk_) :: info integer(psb_ipk_), optional :: n ! Local variables - integer(psb_ipk_) :: isz + integer(psb_ipk_) :: isz, i if (.not.allocated(x%v)) return if (.not.x%is_host()) call x%sync() @@ -794,7 +800,15 @@ contains call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect') return end if - res(1:isz) = x%v(1:isz) + if (.false.) then + res(1:isz) = x%v(1:isz) + else + !$omp parallel do private(i) + do i=1, isz + res(i) = x%v(i) + end do + end if + end function c_base_get_vect ! @@ -812,7 +826,7 @@ contains complex(psb_spk_), intent(in) :: val integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i first_=1 last_=size(x%v) @@ -820,7 +834,14 @@ contains if (present(last)) last_ = min(last,last_) if (x%is_dev()) call x%sync() +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val + end do +#else x%v(first_:last_) = val +#endif call x%set_host() end subroutine c_base_set_scal @@ -838,19 +859,27 @@ contains complex(psb_spk_), intent(in) :: val(:) integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i, info + if (.not.allocated(x%v)) then + call psb_realloc(size(val),x%v,info) + end if + first_ = 1 if (present(first)) first_ = max(1,first) last_ = min(psb_size(x%v),first_+size(val)-1) if (present(last)) last_ = min(last,last_) - if (allocated(x%v)) then - if (x%is_dev()) call x%sync() + if (x%is_dev()) call x%sync() + +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val(i-first_+1) + end do +#else x%v(first_:last_) = val(1:last_-first_+1) - else - x%v = val - end if +#endif call x%set_host() end subroutine c_base_set_vect @@ -888,9 +917,18 @@ contains implicit none class(psb_c_base_vect_type), intent(inout) :: x + integer(psb_ipk_) :: i + if (allocated(x%v)) then if (x%is_dev()) call x%sync() +#if defined(OPENMP) + !$omp parallel do private(i) + do i=1, size(x%v) + x%v(i) = abs(x%v(i)) + end do +#else x%v = abs(x%v) +#endif call x%set_host() end if @@ -1132,6 +1170,7 @@ contains info = 0 if (y%is_dev()) call y%sync() n = min(size(y%v), size(x)) + !$omp parallel do private(i) do i=1, n y%v(i) = y%v(i)*x(i) end do @@ -1169,6 +1208,7 @@ contains if (beta == cone) then return else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) end do @@ -1176,42 +1216,51 @@ contains else if (alpha == cone) then if (beta == czero) then + !$omp parallel do private(i) do i=1, n z%v(i) = y(i)*x(i) end do else if (beta == cone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) + y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) + y(i)*x(i) end do end if else if (alpha == -cone) then if (beta == czero) then + !$omp parallel do private(i) do i=1, n z%v(i) = -y(i)*x(i) end do else if (beta == cone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) - y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) - y(i)*x(i) end do end if else if (beta == czero) then + !$omp parallel do private(i) do i=1, n z%v(i) = alpha*y(i)*x(i) end do else if (beta == cone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) + alpha*y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) + alpha*y(i)*x(i) end do @@ -1314,7 +1363,6 @@ contains if (x%is_dev()) call x%sync() call x%div(x%v,y%v,info) - end subroutine c_base_div_v ! !> Function base_div_v2 @@ -1358,7 +1406,6 @@ contains if (x%is_dev()) call x%sync() call x%div(x%v,y%v,info,flag) - end subroutine c_base_div_v_check ! !> Function base_div_v2_check @@ -1381,7 +1428,6 @@ contains if (z%is_dev()) call z%sync() call z%div(x%v,y%v,info,flag) - end subroutine c_base_div_v2_check ! !> Function base_div_a2 @@ -1403,6 +1449,7 @@ contains if (z%is_dev()) call z%sync() n = min(size(y), size(x)) + !$omp parallel do private(i) do i=1, n z%v(i) = x(i)/y(i) end do @@ -1433,6 +1480,7 @@ contains if (z%is_dev()) call z%sync() n = min(size(y), size(x)) + ! $omp parallel do private(i) do i=1, n if (y(i) /= 0) then z%v(i) = x(i)/y(i) @@ -1443,7 +1491,6 @@ contains end do end if - end subroutine c_base_div_a2_check ! !> Function base_inv_v @@ -1487,7 +1534,6 @@ contains if (y%is_dev()) call y%sync() call y%inv(x%v,info,flag) - end subroutine c_base_inv_v_check ! !> Function base_inv_a2 @@ -1509,6 +1555,7 @@ contains if (y%is_dev()) call y%sync() n = size(x) + !$omp parallel do private(i) do i=1, n y%v(i) = 1_psb_spk_/x(i) end do @@ -1539,6 +1586,7 @@ contains if (y%is_dev()) call y%sync() n = size(x) + !$omp parallel do private(i) do i=1, n if (x(i) /= 0) then y%v(i) = 1_psb_spk_/x(i) @@ -1573,6 +1621,7 @@ contains if (z%is_dev()) call z%sync() n = size(x) + !$omp parallel do private(i) do i = 1, n, 1 if ( abs(x(i)).ge.c ) then z%v(i) = 1_psb_spk_ @@ -1618,14 +1667,21 @@ contains implicit none class(psb_c_base_vect_type), intent(inout) :: x complex(psb_spk_), intent (in) :: alpha + integer(psb_ipk_) :: i if (allocated(x%v)) then +#if defined(OPENMP) + !$omp parallel do private(i) + do i=1,size(x%v) + x%v(i) = alpha*x%v(i) + end do +#else x%v = alpha*x%v - call x%set_host() +#endif end if - + call x%set_host() end subroutine c_base_scal - + ! ! Norms 1, 2 and infinity ! @@ -1655,10 +1711,18 @@ contains class(psb_c_base_vect_type), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res + integer(psb_ipk_) :: i if (x%is_dev()) call x%sync() +#if defined(OPENMP) + res = szero + !$omp parallel do private(i) reduction(max: res) + do i=1, n + res = max(res,abs(x%v(i))) + end do +#else res = maxval(abs(x%v(1:n))) - +#endif end function c_base_amax @@ -1672,10 +1736,18 @@ contains class(psb_c_base_vect_type), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res - + integer(psb_ipk_) :: i + if (x%is_dev()) call x%sync() +#if defined(OPENMP) + res=szero + !$omp parallel do private(i) reduction(+: res) + do i= 1, size(x%v) + res = res + abs(x%v(i)) + end do +#else res = sum(abs(x%v(1:n))) - +#endif end function c_base_asum @@ -1882,13 +1954,17 @@ contains integer(psb_ipk_) :: i, n if (z%is_dev()) call z%sync() - +#if defined(OPENMP) n = size(x) - do i = 1, n, 1 - z%v(i) = x(i) + b + !$omp parallel do private(i) + do i = 1, n + z%v(i) = x(i) + b end do +#else + z%v = x + b +#endif info = 0 - + end subroutine c_base_addconst_a2 ! !> Function _base_addconst_v2 @@ -1914,9 +1990,6 @@ contains end module psb_c_base_vect_mod - - - module psb_c_base_multivect_mod use psb_const_mod diff --git a/base/modules/serial/psb_c_mat_mod.F90 b/base/modules/serial/psb_c_mat_mod.F90 index 76225758..5e889da2 100644 --- a/base/modules/serial/psb_c_mat_mod.F90 +++ b/base/modules/serial/psb_c_mat_mod.F90 @@ -128,6 +128,7 @@ module psb_c_mat_mod ! Memory/data management procedure, pass(a) :: csall => psb_c_csall + generic, public :: allocate => csall procedure, pass(a) :: free => psb_c_free procedure, pass(a) :: trim => psb_c_trim procedure, pass(a) :: csput_a => psb_c_csput_a @@ -326,6 +327,7 @@ module psb_c_mat_mod ! Memory/data management procedure, pass(a) :: csall => psb_lc_csall + generic, public :: allocate => csall procedure, pass(a) :: free => psb_lc_free procedure, pass(a) :: trim => psb_lc_trim procedure, pass(a) :: csput_a => psb_lc_csput_a @@ -604,12 +606,14 @@ module psb_c_mat_mod end interface interface - subroutine psb_c_csall(nr,nc,a,info,nz) - import :: psb_ipk_, psb_lpk_, psb_cspmat_type + subroutine psb_c_csall(nr,nc,a,info,nz,type,mold) + import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_c_base_sparse_mat class(psb_cspmat_type), intent(inout) :: a integer(psb_ipk_), intent(in) :: nr,nc integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nz + character(len=*), intent(in), optional :: type + class(psb_c_base_sparse_mat), optional, intent(in) :: mold end subroutine psb_c_csall end interface @@ -1384,12 +1388,14 @@ module psb_c_mat_mod end interface interface - subroutine psb_lc_csall(nr,nc,a,info,nz) - import :: psb_ipk_, psb_lpk_, psb_lcspmat_type + subroutine psb_lc_csall(nr,nc,a,info,nz,type,mold) + import :: psb_ipk_, psb_lpk_, psb_lcspmat_type, psb_lc_base_sparse_mat class(psb_lcspmat_type), intent(inout) :: a integer(psb_lpk_), intent(in) :: nr,nc integer(psb_ipk_), intent(out) :: info integer(psb_lpk_), intent(in), optional :: nz + character(len=*), intent(in), optional :: type + class(psb_lc_base_sparse_mat), optional, intent(in) :: mold end subroutine psb_lc_csall end interface diff --git a/base/modules/serial/psb_d_base_vect_mod.f90 b/base/modules/serial/psb_d_base_vect_mod.F90 similarity index 97% rename from base/modules/serial/psb_d_base_vect_mod.f90 rename to base/modules/serial/psb_d_base_vect_mod.F90 index daf12cbf..09fd187b 100644 --- a/base/modules/serial/psb_d_base_vect_mod.f90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -273,14 +273,21 @@ contains real(psb_dpk_), intent(in) :: this(:) class(psb_d_base_vect_type), intent(inout) :: x integer(psb_ipk_) :: info - + integer(psb_ipk_) :: i + call psb_realloc(size(this),x%v,info) if (info /= 0) then call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') return end if +#if defined (OPENMP) + !$omp parallel do private(i) + do i = 1, size(this) + x%v(i) = this(i) + end do +#else x%v(:) = this(:) - +#endif end subroutine d_base_bld_x ! @@ -410,7 +417,6 @@ contains case(psb_dupl_ovwrt_) do i = 1, n !loop over all val's rows - ! row actual block row if ((1 <= irl(i)).and.(irl(i) <= isz)) then ! this row belongs to me @@ -790,7 +796,7 @@ contains integer(psb_ipk_) :: info integer(psb_ipk_), optional :: n ! Local variables - integer(psb_ipk_) :: isz + integer(psb_ipk_) :: isz, i if (.not.allocated(x%v)) return if (.not.x%is_host()) call x%sync() @@ -801,7 +807,15 @@ contains call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect') return end if - res(1:isz) = x%v(1:isz) + if (.false.) then + res(1:isz) = x%v(1:isz) + else + !$omp parallel do private(i) + do i=1, isz + res(i) = x%v(i) + end do + end if + end function d_base_get_vect ! @@ -819,7 +833,7 @@ contains real(psb_dpk_), intent(in) :: val integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i first_=1 last_=size(x%v) @@ -827,7 +841,14 @@ contains if (present(last)) last_ = min(last,last_) if (x%is_dev()) call x%sync() +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val + end do +#else x%v(first_:last_) = val +#endif call x%set_host() end subroutine d_base_set_scal @@ -845,19 +866,27 @@ contains real(psb_dpk_), intent(in) :: val(:) integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i, info + if (.not.allocated(x%v)) then + call psb_realloc(size(val),x%v,info) + end if + first_ = 1 if (present(first)) first_ = max(1,first) last_ = min(psb_size(x%v),first_+size(val)-1) if (present(last)) last_ = min(last,last_) - if (allocated(x%v)) then - if (x%is_dev()) call x%sync() + if (x%is_dev()) call x%sync() + +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val(i-first_+1) + end do +#else x%v(first_:last_) = val(1:last_-first_+1) - else - x%v = val - end if +#endif call x%set_host() end subroutine d_base_set_vect @@ -895,9 +924,18 @@ contains implicit none class(psb_d_base_vect_type), intent(inout) :: x + integer(psb_ipk_) :: i + if (allocated(x%v)) then if (x%is_dev()) call x%sync() +#if defined(OPENMP) + !$omp parallel do private(i) + do i=1, size(x%v) + x%v(i) = abs(x%v(i)) + end do +#else x%v = abs(x%v) +#endif call x%set_host() end if @@ -1139,6 +1177,7 @@ contains info = 0 if (y%is_dev()) call y%sync() n = min(size(y%v), size(x)) + !$omp parallel do private(i) do i=1, n y%v(i) = y%v(i)*x(i) end do @@ -1176,6 +1215,7 @@ contains if (beta == done) then return else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) end do @@ -1183,42 +1223,51 @@ contains else if (alpha == done) then if (beta == dzero) then + !$omp parallel do private(i) do i=1, n z%v(i) = y(i)*x(i) end do else if (beta == done) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) + y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) + y(i)*x(i) end do end if else if (alpha == -done) then if (beta == dzero) then + !$omp parallel do private(i) do i=1, n z%v(i) = -y(i)*x(i) end do else if (beta == done) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) - y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) - y(i)*x(i) end do end if else if (beta == dzero) then + !$omp parallel do private(i) do i=1, n z%v(i) = alpha*y(i)*x(i) end do else if (beta == done) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) + alpha*y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) + alpha*y(i)*x(i) end do @@ -1321,7 +1370,6 @@ contains if (x%is_dev()) call x%sync() call x%div(x%v,y%v,info) - end subroutine d_base_div_v ! !> Function base_div_v2 @@ -1365,7 +1413,6 @@ contains if (x%is_dev()) call x%sync() call x%div(x%v,y%v,info,flag) - end subroutine d_base_div_v_check ! !> Function base_div_v2_check @@ -1388,7 +1435,6 @@ contains if (z%is_dev()) call z%sync() call z%div(x%v,y%v,info,flag) - end subroutine d_base_div_v2_check ! !> Function base_div_a2 @@ -1410,6 +1456,7 @@ contains if (z%is_dev()) call z%sync() n = min(size(y), size(x)) + !$omp parallel do private(i) do i=1, n z%v(i) = x(i)/y(i) end do @@ -1440,6 +1487,7 @@ contains if (z%is_dev()) call z%sync() n = min(size(y), size(x)) + ! $omp parallel do private(i) do i=1, n if (y(i) /= 0) then z%v(i) = x(i)/y(i) @@ -1450,7 +1498,6 @@ contains end do end if - end subroutine d_base_div_a2_check ! !> Function base_inv_v @@ -1494,7 +1541,6 @@ contains if (y%is_dev()) call y%sync() call y%inv(x%v,info,flag) - end subroutine d_base_inv_v_check ! !> Function base_inv_a2 @@ -1516,6 +1562,7 @@ contains if (y%is_dev()) call y%sync() n = size(x) + !$omp parallel do private(i) do i=1, n y%v(i) = 1_psb_dpk_/x(i) end do @@ -1546,6 +1593,7 @@ contains if (y%is_dev()) call y%sync() n = size(x) + !$omp parallel do private(i) do i=1, n if (x(i) /= 0) then y%v(i) = 1_psb_dpk_/x(i) @@ -1580,6 +1628,7 @@ contains if (z%is_dev()) call z%sync() n = size(x) + !$omp parallel do private(i) do i = 1, n, 1 if ( abs(x(i)).ge.c ) then z%v(i) = 1_psb_dpk_ @@ -1625,14 +1674,21 @@ contains implicit none class(psb_d_base_vect_type), intent(inout) :: x real(psb_dpk_), intent (in) :: alpha + integer(psb_ipk_) :: i if (allocated(x%v)) then +#if defined(OPENMP) + !$omp parallel do private(i) + do i=1,size(x%v) + x%v(i) = alpha*x%v(i) + end do +#else x%v = alpha*x%v - call x%set_host() +#endif end if - + call x%set_host() end subroutine d_base_scal - + ! ! Norms 1, 2 and infinity ! @@ -1662,10 +1718,18 @@ contains class(psb_d_base_vect_type), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res + integer(psb_ipk_) :: i if (x%is_dev()) call x%sync() +#if defined(OPENMP) + res = dzero + !$omp parallel do private(i) reduction(max: res) + do i=1, n + res = max(res,abs(x%v(i))) + end do +#else res = maxval(abs(x%v(1:n))) - +#endif end function d_base_amax ! @@ -1678,10 +1742,18 @@ contains class(psb_d_base_vect_type), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res + integer(psb_ipk_) :: i if (x%is_dev()) call x%sync() +#if defined(OPENMP) + res = HUGE(done) + !$omp parallel do private(i) reduction(min: res) + do i=1, n + res = min(res,abs(x%v(i))) + end do +#else res = minval(x%v(1:n)) - +#endif end function d_base_min ! @@ -1730,10 +1802,11 @@ contains z = huge(z) n = min(size(y), size(x%v)) + !$omp parallel do private(i,temp) reduction(min: z) do i=1, n if ( y(i) /= dzero ) then temp = x%v(i)/y(i) - if (temp <= z) z = temp + z = min(z,temp) end if end do @@ -1750,10 +1823,18 @@ contains class(psb_d_base_vect_type), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res - + integer(psb_ipk_) :: i + if (x%is_dev()) call x%sync() +#if defined(OPENMP) + res=dzero + !$omp parallel do private(i) reduction(+: res) + do i= 1, size(x%v) + res = res + abs(x%v(i)) + end do +#else res = sum(abs(x%v(1:n))) - +#endif end function d_base_asum @@ -2052,13 +2133,17 @@ contains integer(psb_ipk_) :: i, n if (z%is_dev()) call z%sync() - +#if defined(OPENMP) n = size(x) - do i = 1, n, 1 - z%v(i) = x(i) + b + !$omp parallel do private(i) + do i = 1, n + z%v(i) = x(i) + b end do +#else + z%v = x + b +#endif info = 0 - + end subroutine d_base_addconst_a2 ! !> Function _base_addconst_v2 @@ -2084,9 +2169,6 @@ contains end module psb_d_base_vect_mod - - - module psb_d_base_multivect_mod use psb_const_mod diff --git a/base/modules/serial/psb_d_mat_mod.F90 b/base/modules/serial/psb_d_mat_mod.F90 index 878d099f..caf03994 100644 --- a/base/modules/serial/psb_d_mat_mod.F90 +++ b/base/modules/serial/psb_d_mat_mod.F90 @@ -128,6 +128,7 @@ module psb_d_mat_mod ! Memory/data management procedure, pass(a) :: csall => psb_d_csall + generic, public :: allocate => csall procedure, pass(a) :: free => psb_d_free procedure, pass(a) :: trim => psb_d_trim procedure, pass(a) :: csput_a => psb_d_csput_a @@ -326,6 +327,7 @@ module psb_d_mat_mod ! Memory/data management procedure, pass(a) :: csall => psb_ld_csall + generic, public :: allocate => csall procedure, pass(a) :: free => psb_ld_free procedure, pass(a) :: trim => psb_ld_trim procedure, pass(a) :: csput_a => psb_ld_csput_a @@ -604,12 +606,14 @@ module psb_d_mat_mod end interface interface - subroutine psb_d_csall(nr,nc,a,info,nz) - import :: psb_ipk_, psb_lpk_, psb_dspmat_type + subroutine psb_d_csall(nr,nc,a,info,nz,type,mold) + import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_d_base_sparse_mat class(psb_dspmat_type), intent(inout) :: a integer(psb_ipk_), intent(in) :: nr,nc integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nz + character(len=*), intent(in), optional :: type + class(psb_d_base_sparse_mat), optional, intent(in) :: mold end subroutine psb_d_csall end interface @@ -1384,12 +1388,14 @@ module psb_d_mat_mod end interface interface - subroutine psb_ld_csall(nr,nc,a,info,nz) - import :: psb_ipk_, psb_lpk_, psb_ldspmat_type + subroutine psb_ld_csall(nr,nc,a,info,nz,type,mold) + import :: psb_ipk_, psb_lpk_, psb_ldspmat_type, psb_ld_base_sparse_mat class(psb_ldspmat_type), intent(inout) :: a integer(psb_lpk_), intent(in) :: nr,nc integer(psb_ipk_), intent(out) :: info integer(psb_lpk_), intent(in), optional :: nz + character(len=*), intent(in), optional :: type + class(psb_ld_base_sparse_mat), optional, intent(in) :: mold end subroutine psb_ld_csall end interface diff --git a/base/modules/serial/psb_i_base_vect_mod.f90 b/base/modules/serial/psb_i_base_vect_mod.F90 similarity index 98% rename from base/modules/serial/psb_i_base_vect_mod.f90 rename to base/modules/serial/psb_i_base_vect_mod.F90 index 55d7b47e..0289ecd0 100644 --- a/base/modules/serial/psb_i_base_vect_mod.f90 +++ b/base/modules/serial/psb_i_base_vect_mod.F90 @@ -202,14 +202,21 @@ contains integer(psb_ipk_), intent(in) :: this(:) class(psb_i_base_vect_type), intent(inout) :: x integer(psb_ipk_) :: info - + integer(psb_ipk_) :: i + call psb_realloc(size(this),x%v,info) if (info /= 0) then call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') return end if +#if defined (OPENMP) + !$omp parallel do private(i) + do i = 1, size(this) + x%v(i) = this(i) + end do +#else x%v(:) = this(:) - +#endif end subroutine i_base_bld_x ! @@ -339,7 +346,6 @@ contains case(psb_dupl_ovwrt_) do i = 1, n !loop over all val's rows - ! row actual block row if ((1 <= irl(i)).and.(irl(i) <= isz)) then ! this row belongs to me @@ -719,7 +725,7 @@ contains integer(psb_ipk_) :: info integer(psb_ipk_), optional :: n ! Local variables - integer(psb_ipk_) :: isz + integer(psb_ipk_) :: isz, i if (.not.allocated(x%v)) return if (.not.x%is_host()) call x%sync() @@ -730,7 +736,15 @@ contains call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect') return end if - res(1:isz) = x%v(1:isz) + if (.false.) then + res(1:isz) = x%v(1:isz) + else + !$omp parallel do private(i) + do i=1, isz + res(i) = x%v(i) + end do + end if + end function i_base_get_vect ! @@ -748,7 +762,7 @@ contains integer(psb_ipk_), intent(in) :: val integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i first_=1 last_=size(x%v) @@ -756,7 +770,14 @@ contains if (present(last)) last_ = min(last,last_) if (x%is_dev()) call x%sync() +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val + end do +#else x%v(first_:last_) = val +#endif call x%set_host() end subroutine i_base_set_scal @@ -774,19 +795,27 @@ contains integer(psb_ipk_), intent(in) :: val(:) integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i, info + if (.not.allocated(x%v)) then + call psb_realloc(size(val),x%v,info) + end if + first_ = 1 if (present(first)) first_ = max(1,first) last_ = min(psb_size(x%v),first_+size(val)-1) if (present(last)) last_ = min(last,last_) - if (allocated(x%v)) then - if (x%is_dev()) call x%sync() + if (x%is_dev()) call x%sync() + +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val(i-first_+1) + end do +#else x%v(first_:last_) = val(1:last_-first_+1) - else - x%v = val - end if +#endif call x%set_host() end subroutine i_base_set_vect @@ -980,9 +1009,6 @@ contains end module psb_i_base_vect_mod - - - module psb_i_base_multivect_mod use psb_const_mod diff --git a/base/modules/serial/psb_l_base_vect_mod.f90 b/base/modules/serial/psb_l_base_vect_mod.F90 similarity index 98% rename from base/modules/serial/psb_l_base_vect_mod.f90 rename to base/modules/serial/psb_l_base_vect_mod.F90 index 53b45f2a..d8654f63 100644 --- a/base/modules/serial/psb_l_base_vect_mod.f90 +++ b/base/modules/serial/psb_l_base_vect_mod.F90 @@ -203,14 +203,21 @@ contains integer(psb_lpk_), intent(in) :: this(:) class(psb_l_base_vect_type), intent(inout) :: x integer(psb_ipk_) :: info - + integer(psb_ipk_) :: i + call psb_realloc(size(this),x%v,info) if (info /= 0) then call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') return end if +#if defined (OPENMP) + !$omp parallel do private(i) + do i = 1, size(this) + x%v(i) = this(i) + end do +#else x%v(:) = this(:) - +#endif end subroutine l_base_bld_x ! @@ -340,7 +347,6 @@ contains case(psb_dupl_ovwrt_) do i = 1, n !loop over all val's rows - ! row actual block row if ((1 <= irl(i)).and.(irl(i) <= isz)) then ! this row belongs to me @@ -720,7 +726,7 @@ contains integer(psb_ipk_) :: info integer(psb_ipk_), optional :: n ! Local variables - integer(psb_ipk_) :: isz + integer(psb_ipk_) :: isz, i if (.not.allocated(x%v)) return if (.not.x%is_host()) call x%sync() @@ -731,7 +737,15 @@ contains call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect') return end if - res(1:isz) = x%v(1:isz) + if (.false.) then + res(1:isz) = x%v(1:isz) + else + !$omp parallel do private(i) + do i=1, isz + res(i) = x%v(i) + end do + end if + end function l_base_get_vect ! @@ -749,7 +763,7 @@ contains integer(psb_lpk_), intent(in) :: val integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i first_=1 last_=size(x%v) @@ -757,7 +771,14 @@ contains if (present(last)) last_ = min(last,last_) if (x%is_dev()) call x%sync() +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val + end do +#else x%v(first_:last_) = val +#endif call x%set_host() end subroutine l_base_set_scal @@ -775,19 +796,27 @@ contains integer(psb_lpk_), intent(in) :: val(:) integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i, info + if (.not.allocated(x%v)) then + call psb_realloc(size(val),x%v,info) + end if + first_ = 1 if (present(first)) first_ = max(1,first) last_ = min(psb_size(x%v),first_+size(val)-1) if (present(last)) last_ = min(last,last_) - if (allocated(x%v)) then - if (x%is_dev()) call x%sync() + if (x%is_dev()) call x%sync() + +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val(i-first_+1) + end do +#else x%v(first_:last_) = val(1:last_-first_+1) - else - x%v = val - end if +#endif call x%set_host() end subroutine l_base_set_vect @@ -981,9 +1010,6 @@ contains end module psb_l_base_vect_mod - - - module psb_l_base_multivect_mod use psb_const_mod diff --git a/base/modules/serial/psb_s_base_vect_mod.f90 b/base/modules/serial/psb_s_base_vect_mod.F90 similarity index 97% rename from base/modules/serial/psb_s_base_vect_mod.f90 rename to base/modules/serial/psb_s_base_vect_mod.F90 index c185e341..231b1dc7 100644 --- a/base/modules/serial/psb_s_base_vect_mod.f90 +++ b/base/modules/serial/psb_s_base_vect_mod.F90 @@ -273,14 +273,21 @@ contains real(psb_spk_), intent(in) :: this(:) class(psb_s_base_vect_type), intent(inout) :: x integer(psb_ipk_) :: info - + integer(psb_ipk_) :: i + call psb_realloc(size(this),x%v,info) if (info /= 0) then call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') return end if +#if defined (OPENMP) + !$omp parallel do private(i) + do i = 1, size(this) + x%v(i) = this(i) + end do +#else x%v(:) = this(:) - +#endif end subroutine s_base_bld_x ! @@ -410,7 +417,6 @@ contains case(psb_dupl_ovwrt_) do i = 1, n !loop over all val's rows - ! row actual block row if ((1 <= irl(i)).and.(irl(i) <= isz)) then ! this row belongs to me @@ -790,7 +796,7 @@ contains integer(psb_ipk_) :: info integer(psb_ipk_), optional :: n ! Local variables - integer(psb_ipk_) :: isz + integer(psb_ipk_) :: isz, i if (.not.allocated(x%v)) return if (.not.x%is_host()) call x%sync() @@ -801,7 +807,15 @@ contains call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect') return end if - res(1:isz) = x%v(1:isz) + if (.false.) then + res(1:isz) = x%v(1:isz) + else + !$omp parallel do private(i) + do i=1, isz + res(i) = x%v(i) + end do + end if + end function s_base_get_vect ! @@ -819,7 +833,7 @@ contains real(psb_spk_), intent(in) :: val integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i first_=1 last_=size(x%v) @@ -827,7 +841,14 @@ contains if (present(last)) last_ = min(last,last_) if (x%is_dev()) call x%sync() +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val + end do +#else x%v(first_:last_) = val +#endif call x%set_host() end subroutine s_base_set_scal @@ -845,19 +866,27 @@ contains real(psb_spk_), intent(in) :: val(:) integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i, info + if (.not.allocated(x%v)) then + call psb_realloc(size(val),x%v,info) + end if + first_ = 1 if (present(first)) first_ = max(1,first) last_ = min(psb_size(x%v),first_+size(val)-1) if (present(last)) last_ = min(last,last_) - if (allocated(x%v)) then - if (x%is_dev()) call x%sync() + if (x%is_dev()) call x%sync() + +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val(i-first_+1) + end do +#else x%v(first_:last_) = val(1:last_-first_+1) - else - x%v = val - end if +#endif call x%set_host() end subroutine s_base_set_vect @@ -895,9 +924,18 @@ contains implicit none class(psb_s_base_vect_type), intent(inout) :: x + integer(psb_ipk_) :: i + if (allocated(x%v)) then if (x%is_dev()) call x%sync() +#if defined(OPENMP) + !$omp parallel do private(i) + do i=1, size(x%v) + x%v(i) = abs(x%v(i)) + end do +#else x%v = abs(x%v) +#endif call x%set_host() end if @@ -1139,6 +1177,7 @@ contains info = 0 if (y%is_dev()) call y%sync() n = min(size(y%v), size(x)) + !$omp parallel do private(i) do i=1, n y%v(i) = y%v(i)*x(i) end do @@ -1176,6 +1215,7 @@ contains if (beta == sone) then return else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) end do @@ -1183,42 +1223,51 @@ contains else if (alpha == sone) then if (beta == szero) then + !$omp parallel do private(i) do i=1, n z%v(i) = y(i)*x(i) end do else if (beta == sone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) + y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) + y(i)*x(i) end do end if else if (alpha == -sone) then if (beta == szero) then + !$omp parallel do private(i) do i=1, n z%v(i) = -y(i)*x(i) end do else if (beta == sone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) - y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) - y(i)*x(i) end do end if else if (beta == szero) then + !$omp parallel do private(i) do i=1, n z%v(i) = alpha*y(i)*x(i) end do else if (beta == sone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) + alpha*y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) + alpha*y(i)*x(i) end do @@ -1321,7 +1370,6 @@ contains if (x%is_dev()) call x%sync() call x%div(x%v,y%v,info) - end subroutine s_base_div_v ! !> Function base_div_v2 @@ -1365,7 +1413,6 @@ contains if (x%is_dev()) call x%sync() call x%div(x%v,y%v,info,flag) - end subroutine s_base_div_v_check ! !> Function base_div_v2_check @@ -1388,7 +1435,6 @@ contains if (z%is_dev()) call z%sync() call z%div(x%v,y%v,info,flag) - end subroutine s_base_div_v2_check ! !> Function base_div_a2 @@ -1410,6 +1456,7 @@ contains if (z%is_dev()) call z%sync() n = min(size(y), size(x)) + !$omp parallel do private(i) do i=1, n z%v(i) = x(i)/y(i) end do @@ -1440,6 +1487,7 @@ contains if (z%is_dev()) call z%sync() n = min(size(y), size(x)) + ! $omp parallel do private(i) do i=1, n if (y(i) /= 0) then z%v(i) = x(i)/y(i) @@ -1450,7 +1498,6 @@ contains end do end if - end subroutine s_base_div_a2_check ! !> Function base_inv_v @@ -1494,7 +1541,6 @@ contains if (y%is_dev()) call y%sync() call y%inv(x%v,info,flag) - end subroutine s_base_inv_v_check ! !> Function base_inv_a2 @@ -1516,6 +1562,7 @@ contains if (y%is_dev()) call y%sync() n = size(x) + !$omp parallel do private(i) do i=1, n y%v(i) = 1_psb_spk_/x(i) end do @@ -1546,6 +1593,7 @@ contains if (y%is_dev()) call y%sync() n = size(x) + !$omp parallel do private(i) do i=1, n if (x(i) /= 0) then y%v(i) = 1_psb_spk_/x(i) @@ -1580,6 +1628,7 @@ contains if (z%is_dev()) call z%sync() n = size(x) + !$omp parallel do private(i) do i = 1, n, 1 if ( abs(x(i)).ge.c ) then z%v(i) = 1_psb_spk_ @@ -1625,14 +1674,21 @@ contains implicit none class(psb_s_base_vect_type), intent(inout) :: x real(psb_spk_), intent (in) :: alpha + integer(psb_ipk_) :: i if (allocated(x%v)) then +#if defined(OPENMP) + !$omp parallel do private(i) + do i=1,size(x%v) + x%v(i) = alpha*x%v(i) + end do +#else x%v = alpha*x%v - call x%set_host() +#endif end if - + call x%set_host() end subroutine s_base_scal - + ! ! Norms 1, 2 and infinity ! @@ -1662,10 +1718,18 @@ contains class(psb_s_base_vect_type), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res + integer(psb_ipk_) :: i if (x%is_dev()) call x%sync() +#if defined(OPENMP) + res = szero + !$omp parallel do private(i) reduction(max: res) + do i=1, n + res = max(res,abs(x%v(i))) + end do +#else res = maxval(abs(x%v(1:n))) - +#endif end function s_base_amax ! @@ -1678,10 +1742,18 @@ contains class(psb_s_base_vect_type), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res + integer(psb_ipk_) :: i if (x%is_dev()) call x%sync() +#if defined(OPENMP) + res = HUGE(sone) + !$omp parallel do private(i) reduction(min: res) + do i=1, n + res = min(res,abs(x%v(i))) + end do +#else res = minval(x%v(1:n)) - +#endif end function s_base_min ! @@ -1730,10 +1802,11 @@ contains z = huge(z) n = min(size(y), size(x%v)) + !$omp parallel do private(i,temp) reduction(min: z) do i=1, n if ( y(i) /= szero ) then temp = x%v(i)/y(i) - if (temp <= z) z = temp + z = min(z,temp) end if end do @@ -1750,10 +1823,18 @@ contains class(psb_s_base_vect_type), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res - + integer(psb_ipk_) :: i + if (x%is_dev()) call x%sync() +#if defined(OPENMP) + res=szero + !$omp parallel do private(i) reduction(+: res) + do i= 1, size(x%v) + res = res + abs(x%v(i)) + end do +#else res = sum(abs(x%v(1:n))) - +#endif end function s_base_asum @@ -2052,13 +2133,17 @@ contains integer(psb_ipk_) :: i, n if (z%is_dev()) call z%sync() - +#if defined(OPENMP) n = size(x) - do i = 1, n, 1 - z%v(i) = x(i) + b + !$omp parallel do private(i) + do i = 1, n + z%v(i) = x(i) + b end do +#else + z%v = x + b +#endif info = 0 - + end subroutine s_base_addconst_a2 ! !> Function _base_addconst_v2 @@ -2084,9 +2169,6 @@ contains end module psb_s_base_vect_mod - - - module psb_s_base_multivect_mod use psb_const_mod diff --git a/base/modules/serial/psb_s_mat_mod.F90 b/base/modules/serial/psb_s_mat_mod.F90 index 3553e96b..8e3934b8 100644 --- a/base/modules/serial/psb_s_mat_mod.F90 +++ b/base/modules/serial/psb_s_mat_mod.F90 @@ -128,6 +128,7 @@ module psb_s_mat_mod ! Memory/data management procedure, pass(a) :: csall => psb_s_csall + generic, public :: allocate => csall procedure, pass(a) :: free => psb_s_free procedure, pass(a) :: trim => psb_s_trim procedure, pass(a) :: csput_a => psb_s_csput_a @@ -326,6 +327,7 @@ module psb_s_mat_mod ! Memory/data management procedure, pass(a) :: csall => psb_ls_csall + generic, public :: allocate => csall procedure, pass(a) :: free => psb_ls_free procedure, pass(a) :: trim => psb_ls_trim procedure, pass(a) :: csput_a => psb_ls_csput_a @@ -604,12 +606,14 @@ module psb_s_mat_mod end interface interface - subroutine psb_s_csall(nr,nc,a,info,nz) - import :: psb_ipk_, psb_lpk_, psb_sspmat_type + subroutine psb_s_csall(nr,nc,a,info,nz,type,mold) + import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_s_base_sparse_mat class(psb_sspmat_type), intent(inout) :: a integer(psb_ipk_), intent(in) :: nr,nc integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nz + character(len=*), intent(in), optional :: type + class(psb_s_base_sparse_mat), optional, intent(in) :: mold end subroutine psb_s_csall end interface @@ -1384,12 +1388,14 @@ module psb_s_mat_mod end interface interface - subroutine psb_ls_csall(nr,nc,a,info,nz) - import :: psb_ipk_, psb_lpk_, psb_lsspmat_type + subroutine psb_ls_csall(nr,nc,a,info,nz,type,mold) + import :: psb_ipk_, psb_lpk_, psb_lsspmat_type, psb_ls_base_sparse_mat class(psb_lsspmat_type), intent(inout) :: a integer(psb_lpk_), intent(in) :: nr,nc integer(psb_ipk_), intent(out) :: info integer(psb_lpk_), intent(in), optional :: nz + character(len=*), intent(in), optional :: type + class(psb_ls_base_sparse_mat), optional, intent(in) :: mold end subroutine psb_ls_csall end interface diff --git a/base/modules/serial/psb_serial_mod.f90 b/base/modules/serial/psb_serial_mod.f90 index 2f2154e0..627b318e 100644 --- a/base/modules/serial/psb_serial_mod.f90 +++ b/base/modules/serial/psb_serial_mod.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,8 +27,8 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! module psb_serial_mod use psb_const_mod use psb_error_mod @@ -66,9 +66,42 @@ module psb_serial_mod real(psb_dpk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info end subroutine psb_d_nspaxpby + subroutine psb_s_nspaxpby(nz,iz,z,alpha, nx, ix, x, beta, ny,iy,y,info) + use psb_const_mod + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), intent(out) :: iz(:) + real(psb_spk_), intent (out) :: z(:) + integer(psb_ipk_), intent(in) :: nx, ny + integer(psb_ipk_), intent(in) :: ix(:), iy(:) + real(psb_spk_), intent (in) :: x(:), y(:) + real(psb_spk_), intent (in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_nspaxpby + subroutine psb_c_nspaxpby(nz,iz,z,alpha, nx, ix, x, beta, ny,iy,y,info) + use psb_const_mod + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), intent(out) :: iz(:) + complex(psb_spk_), intent (out) :: z(:) + integer(psb_ipk_), intent(in) :: nx, ny + integer(psb_ipk_), intent(in) :: ix(:), iy(:) + complex(psb_spk_), intent (in) :: x(:), y(:) + complex(psb_spk_), intent (in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_nspaxpby + subroutine psb_z_nspaxpby(nz,iz,z,alpha, nx, ix, x, beta, ny,iy,y,info) + use psb_const_mod + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), intent(out) :: iz(:) + complex(psb_dpk_), intent (out) :: z(:) + integer(psb_ipk_), intent(in) :: nx, ny + integer(psb_ipk_), intent(in) :: ix(:), iy(:) + complex(psb_dpk_), intent (in) :: x(:), y(:) + complex(psb_dpk_), intent (in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_nspaxpby end interface psb_nspaxpby - interface + interface subroutine symbmm (n, m, l, ia, ja, diaga, & & ib, jb, diagb, ic, jc, diagc, index) import :: psb_ipk_ @@ -84,7 +117,7 @@ module psb_serial_mod integer(psb_lpk_), allocatable :: ic(:),jc(:) end subroutine lsymbmm end interface - + contains @@ -103,13 +136,13 @@ contains elemental function psb_cnrm1(x) result(res) complex(psb_spk_), intent(in) :: x real(psb_spk_) :: res - res = abs( real( x ) ) + abs( aimag( x ) ) + res = abs( real( x ) ) + abs( aimag( x ) ) end function psb_cnrm1 elemental function psb_znrm1(x) result(res) complex(psb_dpk_), intent(in) :: x real(psb_dpk_) :: res - res = abs( real( x ) ) + abs( aimag( x ) ) + res = abs( real( x ) ) + abs( aimag( x ) ) end function psb_znrm1 elemental function psb_sminreal(x) result(res) @@ -127,13 +160,13 @@ contains elemental function psb_cminreal(x) result(res) complex(psb_spk_), intent(in) :: x real(psb_spk_) :: res - res = min( real( x ) , aimag( x ) ) + res = min( real( x ) , aimag( x ) ) end function psb_cminreal elemental function psb_zminreal(x) result(res) complex(psb_dpk_), intent(in) :: x real(psb_dpk_) :: res - res = min( real( x ) , aimag( x ) ) + res = min( real( x ) , aimag( x ) ) end function psb_zminreal @@ -197,7 +230,7 @@ contains ! .. executable statements .. ! if( n <= 0 ) return - if( incx == 1 .and. incy == 1 ) then + if( incx == 1 .and. incy == 1 ) then ! ! code for both increments equal to 1 ! @@ -232,7 +265,7 @@ contains real(psb_spk_) norm,scale complex(psb_spk_) alpha ! - if (cabs(ca) == 0.0) then + if (cabs(ca) == 0.0) then ! c = 0.0d0 s = (1.0,0.0) @@ -316,7 +349,7 @@ contains ! .. executable statements .. ! if( n <= 0 ) return - if( incx == 1 .and. incy == 1 ) then + if( incx == 1 .and. incy == 1 ) then ! ! code for both increments equal to 1 ! @@ -351,7 +384,7 @@ contains real(psb_dpk_) norm,scale complex(psb_dpk_) alpha ! - if (cdabs(ca) == 0.0d0) then + if (cdabs(ca) == 0.0d0) then ! c = 0.0d0 s = (1.0d0,0.0d0) @@ -374,4 +407,3 @@ contains end module psb_serial_mod - diff --git a/base/modules/serial/psb_z_base_vect_mod.f90 b/base/modules/serial/psb_z_base_vect_mod.F90 similarity index 97% rename from base/modules/serial/psb_z_base_vect_mod.f90 rename to base/modules/serial/psb_z_base_vect_mod.F90 index 1daed233..08cfb840 100644 --- a/base/modules/serial/psb_z_base_vect_mod.f90 +++ b/base/modules/serial/psb_z_base_vect_mod.F90 @@ -266,14 +266,21 @@ contains complex(psb_dpk_), intent(in) :: this(:) class(psb_z_base_vect_type), intent(inout) :: x integer(psb_ipk_) :: info - + integer(psb_ipk_) :: i + call psb_realloc(size(this),x%v,info) if (info /= 0) then call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') return end if +#if defined (OPENMP) + !$omp parallel do private(i) + do i = 1, size(this) + x%v(i) = this(i) + end do +#else x%v(:) = this(:) - +#endif end subroutine z_base_bld_x ! @@ -403,7 +410,6 @@ contains case(psb_dupl_ovwrt_) do i = 1, n !loop over all val's rows - ! row actual block row if ((1 <= irl(i)).and.(irl(i) <= isz)) then ! this row belongs to me @@ -783,7 +789,7 @@ contains integer(psb_ipk_) :: info integer(psb_ipk_), optional :: n ! Local variables - integer(psb_ipk_) :: isz + integer(psb_ipk_) :: isz, i if (.not.allocated(x%v)) return if (.not.x%is_host()) call x%sync() @@ -794,7 +800,15 @@ contains call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect') return end if - res(1:isz) = x%v(1:isz) + if (.false.) then + res(1:isz) = x%v(1:isz) + else + !$omp parallel do private(i) + do i=1, isz + res(i) = x%v(i) + end do + end if + end function z_base_get_vect ! @@ -812,7 +826,7 @@ contains complex(psb_dpk_), intent(in) :: val integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i first_=1 last_=size(x%v) @@ -820,7 +834,14 @@ contains if (present(last)) last_ = min(last,last_) if (x%is_dev()) call x%sync() +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val + end do +#else x%v(first_:last_) = val +#endif call x%set_host() end subroutine z_base_set_scal @@ -838,19 +859,27 @@ contains complex(psb_dpk_), intent(in) :: val(:) integer(psb_ipk_), optional :: first, last - integer(psb_ipk_) :: first_, last_ + integer(psb_ipk_) :: first_, last_, i, info + if (.not.allocated(x%v)) then + call psb_realloc(size(val),x%v,info) + end if + first_ = 1 if (present(first)) first_ = max(1,first) last_ = min(psb_size(x%v),first_+size(val)-1) if (present(last)) last_ = min(last,last_) - if (allocated(x%v)) then - if (x%is_dev()) call x%sync() + if (x%is_dev()) call x%sync() + +#if defined(OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val(i-first_+1) + end do +#else x%v(first_:last_) = val(1:last_-first_+1) - else - x%v = val - end if +#endif call x%set_host() end subroutine z_base_set_vect @@ -888,9 +917,18 @@ contains implicit none class(psb_z_base_vect_type), intent(inout) :: x + integer(psb_ipk_) :: i + if (allocated(x%v)) then if (x%is_dev()) call x%sync() +#if defined(OPENMP) + !$omp parallel do private(i) + do i=1, size(x%v) + x%v(i) = abs(x%v(i)) + end do +#else x%v = abs(x%v) +#endif call x%set_host() end if @@ -1132,6 +1170,7 @@ contains info = 0 if (y%is_dev()) call y%sync() n = min(size(y%v), size(x)) + !$omp parallel do private(i) do i=1, n y%v(i) = y%v(i)*x(i) end do @@ -1169,6 +1208,7 @@ contains if (beta == zone) then return else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) end do @@ -1176,42 +1216,51 @@ contains else if (alpha == zone) then if (beta == zzero) then + !$omp parallel do private(i) do i=1, n z%v(i) = y(i)*x(i) end do else if (beta == zone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) + y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) + y(i)*x(i) end do end if else if (alpha == -zone) then if (beta == zzero) then + !$omp parallel do private(i) do i=1, n z%v(i) = -y(i)*x(i) end do else if (beta == zone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) - y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) - y(i)*x(i) end do end if else if (beta == zzero) then + !$omp parallel do private(i) do i=1, n z%v(i) = alpha*y(i)*x(i) end do else if (beta == zone) then + !$omp parallel do private(i) do i=1, n z%v(i) = z%v(i) + alpha*y(i)*x(i) end do else + !$omp parallel do private(i) do i=1, n z%v(i) = beta*z%v(i) + alpha*y(i)*x(i) end do @@ -1314,7 +1363,6 @@ contains if (x%is_dev()) call x%sync() call x%div(x%v,y%v,info) - end subroutine z_base_div_v ! !> Function base_div_v2 @@ -1358,7 +1406,6 @@ contains if (x%is_dev()) call x%sync() call x%div(x%v,y%v,info,flag) - end subroutine z_base_div_v_check ! !> Function base_div_v2_check @@ -1381,7 +1428,6 @@ contains if (z%is_dev()) call z%sync() call z%div(x%v,y%v,info,flag) - end subroutine z_base_div_v2_check ! !> Function base_div_a2 @@ -1403,6 +1449,7 @@ contains if (z%is_dev()) call z%sync() n = min(size(y), size(x)) + !$omp parallel do private(i) do i=1, n z%v(i) = x(i)/y(i) end do @@ -1433,6 +1480,7 @@ contains if (z%is_dev()) call z%sync() n = min(size(y), size(x)) + ! $omp parallel do private(i) do i=1, n if (y(i) /= 0) then z%v(i) = x(i)/y(i) @@ -1443,7 +1491,6 @@ contains end do end if - end subroutine z_base_div_a2_check ! !> Function base_inv_v @@ -1487,7 +1534,6 @@ contains if (y%is_dev()) call y%sync() call y%inv(x%v,info,flag) - end subroutine z_base_inv_v_check ! !> Function base_inv_a2 @@ -1509,6 +1555,7 @@ contains if (y%is_dev()) call y%sync() n = size(x) + !$omp parallel do private(i) do i=1, n y%v(i) = 1_psb_dpk_/x(i) end do @@ -1539,6 +1586,7 @@ contains if (y%is_dev()) call y%sync() n = size(x) + !$omp parallel do private(i) do i=1, n if (x(i) /= 0) then y%v(i) = 1_psb_dpk_/x(i) @@ -1573,6 +1621,7 @@ contains if (z%is_dev()) call z%sync() n = size(x) + !$omp parallel do private(i) do i = 1, n, 1 if ( abs(x(i)).ge.c ) then z%v(i) = 1_psb_dpk_ @@ -1618,14 +1667,21 @@ contains implicit none class(psb_z_base_vect_type), intent(inout) :: x complex(psb_dpk_), intent (in) :: alpha + integer(psb_ipk_) :: i if (allocated(x%v)) then +#if defined(OPENMP) + !$omp parallel do private(i) + do i=1,size(x%v) + x%v(i) = alpha*x%v(i) + end do +#else x%v = alpha*x%v - call x%set_host() +#endif end if - + call x%set_host() end subroutine z_base_scal - + ! ! Norms 1, 2 and infinity ! @@ -1655,10 +1711,18 @@ contains class(psb_z_base_vect_type), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res + integer(psb_ipk_) :: i if (x%is_dev()) call x%sync() +#if defined(OPENMP) + res = dzero + !$omp parallel do private(i) reduction(max: res) + do i=1, n + res = max(res,abs(x%v(i))) + end do +#else res = maxval(abs(x%v(1:n))) - +#endif end function z_base_amax @@ -1672,10 +1736,18 @@ contains class(psb_z_base_vect_type), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res - + integer(psb_ipk_) :: i + if (x%is_dev()) call x%sync() +#if defined(OPENMP) + res=dzero + !$omp parallel do private(i) reduction(+: res) + do i= 1, size(x%v) + res = res + abs(x%v(i)) + end do +#else res = sum(abs(x%v(1:n))) - +#endif end function z_base_asum @@ -1882,13 +1954,17 @@ contains integer(psb_ipk_) :: i, n if (z%is_dev()) call z%sync() - +#if defined(OPENMP) n = size(x) - do i = 1, n, 1 - z%v(i) = x(i) + b + !$omp parallel do private(i) + do i = 1, n + z%v(i) = x(i) + b end do +#else + z%v = x + b +#endif info = 0 - + end subroutine z_base_addconst_a2 ! !> Function _base_addconst_v2 @@ -1914,9 +1990,6 @@ contains end module psb_z_base_vect_mod - - - module psb_z_base_multivect_mod use psb_const_mod diff --git a/base/modules/serial/psb_z_mat_mod.F90 b/base/modules/serial/psb_z_mat_mod.F90 index 35586b3e..ed3338f9 100644 --- a/base/modules/serial/psb_z_mat_mod.F90 +++ b/base/modules/serial/psb_z_mat_mod.F90 @@ -128,6 +128,7 @@ module psb_z_mat_mod ! Memory/data management procedure, pass(a) :: csall => psb_z_csall + generic, public :: allocate => csall procedure, pass(a) :: free => psb_z_free procedure, pass(a) :: trim => psb_z_trim procedure, pass(a) :: csput_a => psb_z_csput_a @@ -326,6 +327,7 @@ module psb_z_mat_mod ! Memory/data management procedure, pass(a) :: csall => psb_lz_csall + generic, public :: allocate => csall procedure, pass(a) :: free => psb_lz_free procedure, pass(a) :: trim => psb_lz_trim procedure, pass(a) :: csput_a => psb_lz_csput_a @@ -604,12 +606,14 @@ module psb_z_mat_mod end interface interface - subroutine psb_z_csall(nr,nc,a,info,nz) - import :: psb_ipk_, psb_lpk_, psb_zspmat_type + subroutine psb_z_csall(nr,nc,a,info,nz,type,mold) + import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_z_base_sparse_mat class(psb_zspmat_type), intent(inout) :: a integer(psb_ipk_), intent(in) :: nr,nc integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nz + character(len=*), intent(in), optional :: type + class(psb_z_base_sparse_mat), optional, intent(in) :: mold end subroutine psb_z_csall end interface @@ -1384,12 +1388,14 @@ module psb_z_mat_mod end interface interface - subroutine psb_lz_csall(nr,nc,a,info,nz) - import :: psb_ipk_, psb_lpk_, psb_lzspmat_type + subroutine psb_lz_csall(nr,nc,a,info,nz,type,mold) + import :: psb_ipk_, psb_lpk_, psb_lzspmat_type, psb_lz_base_sparse_mat class(psb_lzspmat_type), intent(inout) :: a integer(psb_lpk_), intent(in) :: nr,nc integer(psb_ipk_), intent(out) :: info integer(psb_lpk_), intent(in), optional :: nz + character(len=*), intent(in), optional :: type + class(psb_lz_base_sparse_mat), optional, intent(in) :: mold end subroutine psb_lz_csall end interface diff --git a/base/modules/tools/psb_c_tools_mod.F90 b/base/modules/tools/psb_c_tools_mod.F90 index 81e78d3a..378e146b 100644 --- a/base/modules/tools/psb_c_tools_mod.F90 +++ b/base/modules/tools/psb_c_tools_mod.F90 @@ -431,5 +431,21 @@ Module psb_c_tools_mod end function end interface + interface psb_remap + subroutine psb_c_remap(np_remap, desc_in, a_in, & + & ipd, isrc, nrsrc, naggr, desc_out, a_out, info) + import + implicit none + !....parameters... + integer(psb_ipk_), intent(in) :: np_remap + type(psb_desc_type), intent(inout) :: desc_in + type(psb_cspmat_type), intent(inout) :: a_in + type(psb_cspmat_type), intent(out) :: a_out + type(psb_desc_type), intent(out) :: desc_out + integer(psb_ipk_), intent(out) :: ipd + integer(psb_ipk_), allocatable, intent(out) :: isrc(:), nrsrc(:), naggr(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_remap + end interface psb_remap end module psb_c_tools_mod diff --git a/base/modules/tools/psb_d_tools_mod.F90 b/base/modules/tools/psb_d_tools_mod.F90 index 76a5bdf2..81c75ece 100644 --- a/base/modules/tools/psb_d_tools_mod.F90 +++ b/base/modules/tools/psb_d_tools_mod.F90 @@ -431,5 +431,21 @@ Module psb_d_tools_mod end function end interface + interface psb_remap + subroutine psb_d_remap(np_remap, desc_in, a_in, & + & ipd, isrc, nrsrc, naggr, desc_out, a_out, info) + import + implicit none + !....parameters... + integer(psb_ipk_), intent(in) :: np_remap + type(psb_desc_type), intent(inout) :: desc_in + type(psb_dspmat_type), intent(inout) :: a_in + type(psb_dspmat_type), intent(out) :: a_out + type(psb_desc_type), intent(out) :: desc_out + integer(psb_ipk_), intent(out) :: ipd + integer(psb_ipk_), allocatable, intent(out) :: isrc(:), nrsrc(:), naggr(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_remap + end interface psb_remap end module psb_d_tools_mod diff --git a/base/modules/tools/psb_i_tools_mod.F90 b/base/modules/tools/psb_i_tools_mod.F90 index def96326..5cc6e836 100644 --- a/base/modules/tools/psb_i_tools_mod.F90 +++ b/base/modules/tools/psb_i_tools_mod.F90 @@ -170,5 +170,4 @@ Module psb_i_tools_mod end subroutine psb_iins_multivect end interface - end module psb_i_tools_mod diff --git a/base/modules/tools/psb_l_tools_mod.F90 b/base/modules/tools/psb_l_tools_mod.F90 index b389ef85..56617798 100644 --- a/base/modules/tools/psb_l_tools_mod.F90 +++ b/base/modules/tools/psb_l_tools_mod.F90 @@ -170,5 +170,4 @@ Module psb_l_tools_mod end subroutine psb_lins_multivect end interface - end module psb_l_tools_mod diff --git a/base/modules/tools/psb_s_tools_mod.F90 b/base/modules/tools/psb_s_tools_mod.F90 index 2b6058da..fa82a53e 100644 --- a/base/modules/tools/psb_s_tools_mod.F90 +++ b/base/modules/tools/psb_s_tools_mod.F90 @@ -431,5 +431,21 @@ Module psb_s_tools_mod end function end interface + interface psb_remap + subroutine psb_s_remap(np_remap, desc_in, a_in, & + & ipd, isrc, nrsrc, naggr, desc_out, a_out, info) + import + implicit none + !....parameters... + integer(psb_ipk_), intent(in) :: np_remap + type(psb_desc_type), intent(inout) :: desc_in + type(psb_sspmat_type), intent(inout) :: a_in + type(psb_sspmat_type), intent(out) :: a_out + type(psb_desc_type), intent(out) :: desc_out + integer(psb_ipk_), intent(out) :: ipd + integer(psb_ipk_), allocatable, intent(out) :: isrc(:), nrsrc(:), naggr(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_remap + end interface psb_remap end module psb_s_tools_mod diff --git a/base/modules/tools/psb_z_tools_mod.F90 b/base/modules/tools/psb_z_tools_mod.F90 index 09997e94..233f2c20 100644 --- a/base/modules/tools/psb_z_tools_mod.F90 +++ b/base/modules/tools/psb_z_tools_mod.F90 @@ -431,5 +431,21 @@ Module psb_z_tools_mod end function end interface + interface psb_remap + subroutine psb_z_remap(np_remap, desc_in, a_in, & + & ipd, isrc, nrsrc, naggr, desc_out, a_out, info) + import + implicit none + !....parameters... + integer(psb_ipk_), intent(in) :: np_remap + type(psb_desc_type), intent(inout) :: desc_in + type(psb_zspmat_type), intent(inout) :: a_in + type(psb_zspmat_type), intent(out) :: a_out + type(psb_desc_type), intent(out) :: desc_out + integer(psb_ipk_), intent(out) :: ipd + integer(psb_ipk_), allocatable, intent(out) :: isrc(:), nrsrc(:), naggr(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_remap + end interface psb_remap end module psb_z_tools_mod diff --git a/base/psblas/psb_cgetmatinfo.f90 b/base/psblas/psb_cgetmatinfo.F90 similarity index 96% rename from base/psblas/psb_cgetmatinfo.f90 rename to base/psblas/psb_cgetmatinfo.F90 index f9c77166..fdfb0cba 100644 --- a/base/psblas/psb_cgetmatinfo.f90 +++ b/base/psblas/psb_cgetmatinfo.F90 @@ -37,9 +37,13 @@ function psb_cget_nnz(a,desc_a,info) result(res) use psb_base_mod, psb_protect_name => psb_cget_nnz use psi_mod - use mpi - - implicit none +#ifdef MPI_MOD + use mpi +#endif + implicit none +#ifdef MPI_H + include 'mpif.h' +#endif integer(psb_lpk_) :: res type(psb_cspmat_type), intent(in) :: a diff --git a/base/psblas/psb_dgetmatinfo.f90 b/base/psblas/psb_dgetmatinfo.F90 similarity index 96% rename from base/psblas/psb_dgetmatinfo.f90 rename to base/psblas/psb_dgetmatinfo.F90 index 51ef5ca8..16a1d3ca 100644 --- a/base/psblas/psb_dgetmatinfo.f90 +++ b/base/psblas/psb_dgetmatinfo.F90 @@ -37,9 +37,13 @@ function psb_dget_nnz(a,desc_a,info) result(res) use psb_base_mod, psb_protect_name => psb_dget_nnz use psi_mod - use mpi - - implicit none +#ifdef MPI_MOD + use mpi +#endif + implicit none +#ifdef MPI_H + include 'mpif.h' +#endif integer(psb_lpk_) :: res type(psb_dspmat_type), intent(in) :: a diff --git a/base/psblas/psb_sgetmatinfo.f90 b/base/psblas/psb_sgetmatinfo.F90 similarity index 96% rename from base/psblas/psb_sgetmatinfo.f90 rename to base/psblas/psb_sgetmatinfo.F90 index 2da00f27..abf1210c 100644 --- a/base/psblas/psb_sgetmatinfo.f90 +++ b/base/psblas/psb_sgetmatinfo.F90 @@ -37,9 +37,13 @@ function psb_sget_nnz(a,desc_a,info) result(res) use psb_base_mod, psb_protect_name => psb_sget_nnz use psi_mod - use mpi - - implicit none +#ifdef MPI_MOD + use mpi +#endif + implicit none +#ifdef MPI_H + include 'mpif.h' +#endif integer(psb_lpk_) :: res type(psb_sspmat_type), intent(in) :: a diff --git a/base/psblas/psb_zgetmatinfo.f90 b/base/psblas/psb_zgetmatinfo.F90 similarity index 96% rename from base/psblas/psb_zgetmatinfo.f90 rename to base/psblas/psb_zgetmatinfo.F90 index 08482963..fab395f2 100644 --- a/base/psblas/psb_zgetmatinfo.f90 +++ b/base/psblas/psb_zgetmatinfo.F90 @@ -37,9 +37,13 @@ function psb_zget_nnz(a,desc_a,info) result(res) use psb_base_mod, psb_protect_name => psb_zget_nnz use psi_mod - use mpi - - implicit none +#ifdef MPI_MOD + use mpi +#endif + implicit none +#ifdef MPI_H + include 'mpif.h' +#endif integer(psb_lpk_) :: res type(psb_zspmat_type), intent(in) :: a diff --git a/base/serial/Makefile b/base/serial/Makefile index 5bff0b64..0f17a0a4 100644 --- a/base/serial/Makefile +++ b/base/serial/Makefile @@ -11,10 +11,10 @@ FOBJS = psb_lsame.o psi_m_serial_impl.o psi_e_serial_impl.o \ smmp.o lsmmp.o \ psb_sgeprt.o psb_dgeprt.o psb_cgeprt.o psb_zgeprt.o\ psb_spdot_srtd.o psb_aspxpby.o psb_spge_dot.o\ - psb_sgelp.o psb_dgelp.o psb_cgelp.o psb_zgelp.o \ psb_samax_s.o psb_damax_s.o psb_camax_s.o psb_zamax_s.o \ psb_sasum_s.o psb_dasum_s.o psb_casum_s.o psb_zasum_s.o + LIBDIR=.. INCDIR=.. MODDIR=../modules diff --git a/base/serial/impl/psb_c_base_mat_impl.F90 b/base/serial/impl/psb_c_base_mat_impl.F90 index 6d7824be..17f2cdc8 100644 --- a/base/serial/impl/psb_c_base_mat_impl.F90 +++ b/base/serial/impl/psb_c_base_mat_impl.F90 @@ -250,7 +250,6 @@ subroutine psb_c_base_mv_from_coo(a,b,info) end subroutine psb_c_base_mv_from_coo - subroutine psb_c_base_mv_to_fmt(a,b,info) use psb_c_base_mat_mod, psb_protect_name => psb_c_base_mv_to_fmt use psb_error_mod @@ -698,6 +697,8 @@ subroutine psb_c_base_tril(a,l,info,& call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),val,info) + ! Implementing this in OpenMP? + ! Tricky, to be seen do i=imin_,imax_, nbk ibk = min(nbk,imax_-i+1) call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& @@ -849,6 +850,8 @@ subroutine psb_c_base_triu(a,u,info,& call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),val,info) + ! Implementing this in OpenMP? + ! Tricky, to be seen do i=imin_,imax_, nbk ibk = min(nbk,imax_-i+1) call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& @@ -914,8 +917,6 @@ subroutine psb_c_base_triu(a,u,info,& end subroutine psb_c_base_triu - - subroutine psb_c_base_clone(a,b,info) use psb_c_base_mat_mod, psb_protect_name => psb_c_base_clone use psb_error_mod @@ -960,6 +961,7 @@ subroutine psb_c_base_make_nonunit(a) mnm = min(m,n) nz = tmp%get_nzeros() call tmp%reallocate(nz+mnm) + !$omp parallel do private(i) shared(nz) do i=1, mnm tmp%val(nz+i) = cone tmp%ia(nz+i) = i @@ -1506,6 +1508,7 @@ contains complex(psb_spk_), intent(out) :: y(*) integer(psb_ipk_) :: i + !$omp parallel do private(i) do i=1,n y(i) = d(i)*x(i) end do @@ -1519,6 +1522,7 @@ contains complex(psb_spk_), intent(inout) :: x(*) integer(psb_ipk_) :: i + !$omp parallel do private(i) do i=1,n x(i) = d(i)*x(i) end do @@ -3182,6 +3186,8 @@ subroutine psb_lc_base_tril(a,l,info,& call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),val,info) + ! Implementing this in OpenMP? + ! Tricky, to be seen do i=imin_,imax_, nbk ibk = min(nbk,imax_-i+1) call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& @@ -3334,6 +3340,8 @@ subroutine psb_lc_base_triu(a,u,info,& call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),val,info) + ! Implementing this in OpenMP? + ! Tricky, to be seen do i=imin_,imax_, nbk ibk = min(nbk,imax_-i+1) call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& @@ -3446,6 +3454,7 @@ subroutine psb_lc_base_make_nonunit(a) mnm = min(m,n) nz = tmp%get_nzeros() call tmp%reallocate(nz+mnm) + !$omp parallel do private(i) shared(nz) do i=1, mnm tmp%val(nz+i) = cone tmp%ia(nz+i) = i diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index 88bdb66f..4eb2f26e 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -114,6 +114,7 @@ subroutine psb_c_coo_scal(d,a,info,side) goto 9999 end if + !$omp parallel do private(i,j) do i=1,a%get_nzeros() j = a%ia(i) a%val(i) = a%val(i) * d(j) @@ -126,6 +127,7 @@ subroutine psb_c_coo_scal(d,a,info,side) goto 9999 end if + !$omp parallel do private(i,j) do i=1,a%get_nzeros() j = a%ja(i) a%val(i) = a%val(i) * d(j) @@ -201,6 +203,7 @@ subroutine psb_c_coo_scalplusidentity(d,a,info) end if mnm = min(a%get_nrows(),a%get_ncols()) + !$omp parallel do private(i,j) do i=1,a%get_nzeros() a%val(i) = a%val(i) * d j=a%ia(i) @@ -253,12 +256,30 @@ subroutine psb_c_coo_spaxpby(alpha,a,beta,b,info) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum +#if defined (OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nza + tcoo%ia(i) = a%ia(i) + tcoo%ja(i) = a%ja(i) + tcoo%val(i) = alpha*a%val(i) + end do + !$omp parallel do private(i) + do i=1, nzb + tcoo%ia(nza+i) = bcoo%ia(i) + tcoo%ja(nza+i) = bcoo%ja(i) + tcoo%val(nza+i) = beta*bcoo%val(i) + end do + end block +#else tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ja(1:nza) = a%ja(1:nza) tcoo%val(1:nza) = alpha*a%val(1:nza) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb) +#endif ! Fix the indexes call tcoo%fix(info) ! Move to correct output format @@ -346,12 +367,30 @@ function psb_c_coo_cmpmat(a,b,tol,info) result(res) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum +#if defined (OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nza + tcoo%ia(i) = a%ia(i) + tcoo%ja(i) = a%ja(i) + tcoo%val(i) = alpha*a%val(i) + end do + !$omp parallel do private(i) + do i=1, nzb + tcoo%ia(nza+i) = bcoo%ia(i) + tcoo%ja(nza+i) = bcoo%ja(i) + tcoo%val(nza+i) = (-sone)*beta*bcoo%val(i) + end do + end block +#else tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ja(1:nza) = a%ja(1:nza) tcoo%val(1:nza) = a%val(1:nza) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%val(nza+1:nza+nzb) = (-sone)*bcoo%val(1:nzb) +#endif ! Fix the indexes call tcoo%fix(info) if (info /= psb_success_) then @@ -728,9 +767,6 @@ subroutine psb_c_coo_print(iout,a,iv,head,ivr,ivc) end subroutine psb_c_coo_print - - - function psb_c_coo_get_nz_row(idx,a) result(res) use psb_const_mod use psb_sort_mod @@ -1670,7 +1706,6 @@ subroutine psb_c_coo_csmv(alpha,a,x,beta,y,info,trans) end subroutine psb_c_coo_csmv - subroutine psb_c_coo_csmm(alpha,a,x,beta,y,info,trans) use psb_const_mod use psb_error_mod @@ -1709,11 +1744,9 @@ subroutine psb_c_coo_csmm(alpha,a,x,beta,y,info,trans) trans_ = 'N' end if - tra = (psb_toupper(trans_) == 'T') ctra = (psb_toupper(trans_) == 'C') - if (tra.or.ctra) then m = a%get_ncols() n = a%get_nrows() @@ -1895,7 +1928,15 @@ function psb_c_coo_maxval(a) result(res) nnz = a%get_nzeros() if (allocated(a%val)) then nnz = min(nnz,size(a%val)) +#if defined(OPENMP) + res = szero + !$omp parallel do private(i) reduction(max: res) + do i=1, nnz + res = max(res,abs(a%val(i))) + end do +#else res = maxval(abs(a%val(1:nnz))) +#endif end if end function psb_c_coo_maxval @@ -2275,11 +2316,13 @@ subroutine psb_c_coo_csgetptn(imin,imax,a,nz,ia,ja,info,& & iren) if (rscale_) then + !$omp parallel do private(i) do i=nzin_+1, nzin_+nz ia(i) = ia(i) - imin + 1 end do end if if (cscale_) then + !$omp parallel do private(i) do i=nzin_+1, nzin_+nz ja(i) = ja(i) - jmin_ + 1 end do @@ -2553,11 +2596,13 @@ subroutine psb_c_coo_csgetrow(imin,imax,a,nz,ia,ja,val,info,& & iren) if (rscale_) then + !$omp parallel do private(i) do i=nzin_+1, nzin_+nz ia(i) = ia(i) - imin + 1 end do end if if (cscale_) then + !$omp parallel do private(i) do i=nzin_+1, nzin_+nz ja(i) = ja(i) - jmin_ + 1 end do @@ -2768,7 +2813,6 @@ contains end subroutine psb_c_coo_csgetrow - subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_error_mod use psb_realloc_mod @@ -3021,7 +3065,6 @@ contains end subroutine psb_c_coo_csput_a - subroutine psb_c_cp_coo_to_coo(a,b,info) use psb_error_mod use psb_c_base_mat_mod, psb_protect_name => psb_c_cp_coo_to_coo @@ -3045,10 +3088,21 @@ subroutine psb_c_cp_coo_to_coo(a,b,info) call b%set_nzeros(nz) call b%reallocate(nz) +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + b%ia(i) = a%ia(i) + b%ja(i) = a%ja(i) + b%val(i) = a%val(i) + end do + end block +#else b%ia(1:nz) = a%ia(1:nz) b%ja(1:nz) = a%ja(1:nz) b%val(1:nz) = a%val(1:nz) - +#endif call b%set_host() if (.not.b%is_by_rows()) call b%fix(info) @@ -3087,10 +3141,21 @@ subroutine psb_c_cp_coo_from_coo(a,b,info) call a%set_nzeros(nz) call a%reallocate(nz) +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + a%ia(i) = b%ia(i) + a%ja(i) = b%ja(i) + a%val(i) = b%val(i) + end do + end block +#else a%ia(1:nz) = b%ia(1:nz) a%ja(1:nz) = b%ja(1:nz) a%val(1:nz) = b%val(1:nz) - +#endif call a%set_host() if (.not.a%is_by_rows()) call a%fix(info) @@ -3445,8 +3510,6 @@ subroutine psb_c_fix_coo(a,info,idir) end subroutine psb_c_fix_coo - - subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) use psb_const_mod use psb_error_mod @@ -4174,7 +4237,6 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) end subroutine psb_c_fix_coo_inner - subroutine psb_c_cp_coo_to_lcoo(a,b,info) use psb_error_mod use psb_c_base_mat_mod, psb_protect_name => psb_c_cp_coo_to_lcoo @@ -4199,10 +4261,21 @@ subroutine psb_c_cp_coo_to_lcoo(a,b,info) call b%set_nzeros(nz) call b%reallocate(nz) +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + b%ia(i) = a%ia(i) + b%ja(i) = a%ja(i) + b%val(i) = a%val(i) + end do + end block +#else b%ia(1:nz) = a%ia(1:nz) b%ja(1:nz) = a%ja(1:nz) b%val(1:nz) = a%val(1:nz) - +#endif call b%set_host() if (.not.b%is_by_rows()) call b%fix(info) @@ -4240,10 +4313,21 @@ subroutine psb_c_cp_coo_from_lcoo(a,b,info) call a%set_nzeros(nz) call a%reallocate(nz) +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + a%ia(i) = b%ia(i) + a%ja(i) = b%ja(i) + a%val(i) = b%val(i) + end do + end block +#else a%ia(1:nz) = b%ia(1:nz) a%ja(1:nz) = b%ja(1:nz) a%val(1:nz) = b%val(1:nz) - +#endif call a%set_host() if (.not.a%is_by_rows()) call a%fix(info) @@ -4442,7 +4526,17 @@ function psb_lc_coo_maxval(a) result(res) nnz = a%get_nzeros() if (allocated(a%val)) then nnz = min(nnz,size(a%val)) +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nnz + res = max(res,abs(a%val(i))) + end do + end block +#else res = maxval(abs(a%val(1:nnz))) +#endif end if end function psb_lc_coo_maxval @@ -4499,7 +4593,17 @@ function psb_lc_coo_csnmi(a) result(res) i = a%ia(j) vt(i) = vt(i) + abs(a%val(j)) end do +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, m + res = max(res,abs(vt(i))) + end do + end block +#else res = maxval(vt(1:m)) +#endif deallocate(vt,stat=info) end if @@ -4539,7 +4643,17 @@ function psb_lc_coo_csnm1(a) result(res) i = a%ja(j) vt(i) = vt(i) + abs(a%val(j)) end do +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, n + res = max(res,abs(vt(i))) + end do + end block +#else res = maxval(vt(1:n)) +#endif deallocate(vt,stat=info) return @@ -4584,7 +4698,6 @@ subroutine psb_lc_coo_rowsum(d,a) d(i) = d(i) + a%val(j) end do - return call psb_erractionrestore(err_act) return @@ -4592,7 +4705,6 @@ subroutine psb_lc_coo_rowsum(d,a) 9999 call psb_error_handler(err_act) return - end subroutine psb_lc_coo_rowsum subroutine psb_lc_coo_arwsum(d,a) @@ -4761,6 +4873,7 @@ subroutine psb_lc_coo_scalplusidentity(d,a,info) end if mnm = min(a%get_nrows(),a%get_ncols()) + !$omp parallel do private(i,j) do i=1,a%get_nzeros() a%val(i) = a%val(i) * d j=a%ia(i) @@ -4813,12 +4926,30 @@ subroutine psb_lc_coo_spaxpby(alpha,a,beta,b,info) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nza + tcoo%ia(i) = a%ia(i) + tcoo%ja(i) = a%ja(i) + tcoo%val(i) = alpha*a%val(i) + end do + !$omp parallel do private(i) + do i=1, nzb + tcoo%ia(nza+i) = bcoo%ia(i) + tcoo%ja(nza+i) = bcoo%ja(i) + tcoo%val(nza+i) = beta*bcoo%val(i) + end do + end block +#else tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ja(1:nza) = a%ja(1:nza) tcoo%val(1:nza) = alpha*a%val(1:nza) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb) +#endif ! Fix the indexes call tcoo%fix(info) ! Move to correct output format @@ -4906,12 +5037,30 @@ function psb_lc_coo_cmpmat(a,b,tol,info) result(res) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nza + tcoo%ia(i) = a%ia(i) + tcoo%ja(i) = a%ja(i) + tcoo%val(i) = a%val(i) + end do + !$omp parallel do private(i) + do i=1, nzb + tcoo%ia(nza+i) = bcoo%ia(i) + tcoo%ja(nza+i) = bcoo%ja(i) + tcoo%val(nza+i) = (-1_psb_spk_)*bcoo%val(i) + end do + end block +#else tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ja(1:nza) = a%ja(1:nza) tcoo%val(1:nza) = a%val(1:nza) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%val(nza+1:nza+nzb) = (-1_psb_spk_)*bcoo%val(1:nzb) +#endif ! Fix the indexes call tcoo%fix(info) if (info /= psb_success_) then @@ -5950,7 +6099,7 @@ subroutine psb_lc_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (nz < 0) then info = psb_err_iarg_neg_ - call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) +3 call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) goto 9999 end if if (size(ia) < nz) then diff --git a/base/serial/impl/psb_c_csr_impl.f90 b/base/serial/impl/psb_c_csr_impl.f90 index 1f4242fd..25d0a086 100644 --- a/base/serial/impl/psb_c_csr_impl.f90 +++ b/base/serial/impl/psb_c_csr_impl.f90 @@ -130,10 +130,12 @@ contains if (alpha == czero) then if (beta == czero) then + !$omp parallel do private(i) do i = 1, m y(i) = czero enddo else + !$omp parallel do private(i) do i = 1, m y(i) = beta*y(i) end do @@ -147,6 +149,7 @@ contains if (beta == czero) then if (alpha == cone) then + !$omp parallel do private(i,j, acc) schedule(static) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 @@ -157,6 +160,7 @@ contains else if (alpha == -cone) then + !$omp parallel do private(i,j, acc) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 @@ -167,6 +171,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 @@ -181,6 +186,7 @@ contains else if (beta == cone) then if (alpha == cone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 @@ -191,6 +197,7 @@ contains else if (alpha == -cone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 @@ -201,6 +208,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 @@ -214,6 +222,7 @@ contains else if (beta == -cone) then if (alpha == cone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 @@ -224,6 +233,7 @@ contains else if (alpha == -cone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 @@ -234,6 +244,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 @@ -247,6 +258,7 @@ contains else if (alpha == cone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 @@ -257,6 +269,7 @@ contains else if (alpha == -cone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 @@ -267,6 +280,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc = czero do j=irp(i), irp(i+1)-1 @@ -282,16 +296,19 @@ contains else if (tra) then if (beta == czero) then + !$omp parallel do private(i) do i=1, m y(i) = czero end do else if (beta == cone) then ! Do nothing else if (beta == -cone) then + !$omp parallel do private(i) do i=1, m y(i) = -y(i) end do else + !$omp parallel do private(i) do i=1, m y(i) = beta*y(i) end do @@ -476,16 +493,18 @@ contains complex(psb_spk_), intent(inout) :: y(ldy,*) logical, intent(in) :: is_triangle,is_unit,tra,ctra - complex(psb_spk_), intent(inout) :: acc(*) + complex(psb_spk_), intent(inout) :: acc(:) integer(psb_ipk_) :: i,j, ir if (alpha == czero) then if (beta == czero) then + !$omp parallel do private(i) do i = 1, m y(i,1:nc) = czero enddo else + !$omp parallel do private(i) do i = 1, m y(i,1:nc) = beta*y(i,1:nc) end do @@ -497,6 +516,7 @@ contains if (beta == czero) then if (alpha == cone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = czero do j=irp(i), irp(i+1)-1 @@ -507,6 +527,7 @@ contains else if (alpha == -cone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = czero do j=irp(i), irp(i+1)-1 @@ -517,6 +538,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = czero do j=irp(i), irp(i+1)-1 @@ -531,6 +553,7 @@ contains else if (beta == cone) then if (alpha == cone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = czero do j=irp(i), irp(i+1)-1 @@ -541,6 +564,7 @@ contains else if (alpha == -cone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = czero do j=irp(i), irp(i+1)-1 @@ -551,6 +575,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = czero do j=irp(i), irp(i+1)-1 @@ -564,6 +589,7 @@ contains else if (beta == -cone) then if (alpha == cone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = czero do j=irp(i), irp(i+1)-1 @@ -574,6 +600,7 @@ contains else if (alpha == -cone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = czero do j=irp(i), irp(i+1)-1 @@ -584,6 +611,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = czero do j=irp(i), irp(i+1)-1 @@ -597,6 +625,7 @@ contains else if (alpha == cone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = czero do j=irp(i), irp(i+1)-1 @@ -607,6 +636,7 @@ contains else if (alpha == -cone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = czero do j=irp(i), irp(i+1)-1 @@ -617,6 +647,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = czero do j=irp(i), irp(i+1)-1 diff --git a/base/serial/impl/psb_c_mat_impl.F90 b/base/serial/impl/psb_c_mat_impl.F90 index 69c67d02..cc112015 100644 --- a/base/serial/impl/psb_c_mat_impl.F90 +++ b/base/serial/impl/psb_c_mat_impl.F90 @@ -582,7 +582,7 @@ end subroutine psb_c_get_neigh -subroutine psb_c_csall(nr,nc,a,info,nz) +subroutine psb_c_csall(nr,nc,a,info,nz,type,mold) use psb_c_mat_mod, psb_protect_name => psb_c_csall use psb_c_base_mat_mod use psb_error_mod @@ -591,6 +591,8 @@ subroutine psb_c_csall(nr,nc,a,info,nz) integer(psb_ipk_), intent(in) :: nr,nc integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nz + character(len=*), intent(in), optional :: type + class(psb_c_base_sparse_mat), optional, intent(in) :: mold integer(psb_ipk_) :: err_act character(len=20) :: name='csall' @@ -601,7 +603,23 @@ subroutine psb_c_csall(nr,nc,a,info,nz) call a%free() info = psb_success_ - allocate(psb_c_coo_sparse_mat :: a%a, stat=info) + if (present(mold)) then + allocate(a%a, stat=info, mold=mold) + else if (present(type)) then + select case (type) + case('CSR') + allocate(psb_c_csr_sparse_mat :: a%a, stat=info) + case('COO') + allocate(psb_c_coo_sparse_mat :: a%a, stat=info) + case('CSC') + allocate(psb_c_csc_sparse_mat :: a%a, stat=info) + case default + allocate(psb_c_coo_sparse_mat :: a%a, stat=info) + end select + else + allocate(psb_c_coo_sparse_mat :: a%a, stat=info) + end if + if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info, name) @@ -3381,7 +3399,7 @@ end subroutine psb_lc_get_neigh -subroutine psb_lc_csall(nr,nc,a,info,nz) +subroutine psb_lc_csall(nr,nc,a,info,nz,type,mold) use psb_c_mat_mod, psb_protect_name => psb_lc_csall use psb_c_base_mat_mod use psb_error_mod @@ -3390,6 +3408,8 @@ subroutine psb_lc_csall(nr,nc,a,info,nz) integer(psb_lpk_), intent(in) :: nr,nc integer(psb_ipk_), intent(out) :: info integer(psb_lpk_), intent(in), optional :: nz + character(len=*), intent(in), optional :: type + class(psb_lc_base_sparse_mat), optional, intent(in) :: mold integer(psb_ipk_) :: err_act character(len=20) :: name='csall' @@ -3400,7 +3420,22 @@ subroutine psb_lc_csall(nr,nc,a,info,nz) call a%free() info = psb_success_ - allocate(psb_lc_coo_sparse_mat :: a%a, stat=info) + if (present(mold)) then + allocate(a%a, stat=info, mold=mold) + else if (present(type)) then + select case (type) + case('CSR') + allocate(psb_lc_csr_sparse_mat :: a%a, stat=info) + case('COO') + allocate(psb_lc_coo_sparse_mat :: a%a, stat=info) + case('CSC') + allocate(psb_lc_csc_sparse_mat :: a%a, stat=info) + case default + allocate(psb_lc_coo_sparse_mat :: a%a, stat=info) + end select + else + allocate(psb_lc_coo_sparse_mat :: a%a, stat=info) + end if if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info, name) diff --git a/base/serial/impl/psb_d_base_mat_impl.F90 b/base/serial/impl/psb_d_base_mat_impl.F90 index 30cb4d1e..69112529 100644 --- a/base/serial/impl/psb_d_base_mat_impl.F90 +++ b/base/serial/impl/psb_d_base_mat_impl.F90 @@ -250,7 +250,6 @@ subroutine psb_d_base_mv_from_coo(a,b,info) end subroutine psb_d_base_mv_from_coo - subroutine psb_d_base_mv_to_fmt(a,b,info) use psb_d_base_mat_mod, psb_protect_name => psb_d_base_mv_to_fmt use psb_error_mod @@ -698,6 +697,8 @@ subroutine psb_d_base_tril(a,l,info,& call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),val,info) + ! Implementing this in OpenMP? + ! Tricky, to be seen do i=imin_,imax_, nbk ibk = min(nbk,imax_-i+1) call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& @@ -849,6 +850,8 @@ subroutine psb_d_base_triu(a,u,info,& call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),val,info) + ! Implementing this in OpenMP? + ! Tricky, to be seen do i=imin_,imax_, nbk ibk = min(nbk,imax_-i+1) call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& @@ -914,8 +917,6 @@ subroutine psb_d_base_triu(a,u,info,& end subroutine psb_d_base_triu - - subroutine psb_d_base_clone(a,b,info) use psb_d_base_mat_mod, psb_protect_name => psb_d_base_clone use psb_error_mod @@ -960,6 +961,7 @@ subroutine psb_d_base_make_nonunit(a) mnm = min(m,n) nz = tmp%get_nzeros() call tmp%reallocate(nz+mnm) + !$omp parallel do private(i) shared(nz) do i=1, mnm tmp%val(nz+i) = done tmp%ia(nz+i) = i @@ -1506,6 +1508,7 @@ contains real(psb_dpk_), intent(out) :: y(*) integer(psb_ipk_) :: i + !$omp parallel do private(i) do i=1,n y(i) = d(i)*x(i) end do @@ -1519,6 +1522,7 @@ contains real(psb_dpk_), intent(inout) :: x(*) integer(psb_ipk_) :: i + !$omp parallel do private(i) do i=1,n x(i) = d(i)*x(i) end do @@ -3182,6 +3186,8 @@ subroutine psb_ld_base_tril(a,l,info,& call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),val,info) + ! Implementing this in OpenMP? + ! Tricky, to be seen do i=imin_,imax_, nbk ibk = min(nbk,imax_-i+1) call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& @@ -3334,6 +3340,8 @@ subroutine psb_ld_base_triu(a,u,info,& call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),val,info) + ! Implementing this in OpenMP? + ! Tricky, to be seen do i=imin_,imax_, nbk ibk = min(nbk,imax_-i+1) call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& @@ -3446,6 +3454,7 @@ subroutine psb_ld_base_make_nonunit(a) mnm = min(m,n) nz = tmp%get_nzeros() call tmp%reallocate(nz+mnm) + !$omp parallel do private(i) shared(nz) do i=1, mnm tmp%val(nz+i) = done tmp%ia(nz+i) = i diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index cd5ea5a8..6b3aafc8 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -114,6 +114,7 @@ subroutine psb_d_coo_scal(d,a,info,side) goto 9999 end if + !$omp parallel do private(i,j) do i=1,a%get_nzeros() j = a%ia(i) a%val(i) = a%val(i) * d(j) @@ -126,6 +127,7 @@ subroutine psb_d_coo_scal(d,a,info,side) goto 9999 end if + !$omp parallel do private(i,j) do i=1,a%get_nzeros() j = a%ja(i) a%val(i) = a%val(i) * d(j) @@ -201,6 +203,7 @@ subroutine psb_d_coo_scalplusidentity(d,a,info) end if mnm = min(a%get_nrows(),a%get_ncols()) + !$omp parallel do private(i,j) do i=1,a%get_nzeros() a%val(i) = a%val(i) * d j=a%ia(i) @@ -253,12 +256,30 @@ subroutine psb_d_coo_spaxpby(alpha,a,beta,b,info) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum +#if defined (OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nza + tcoo%ia(i) = a%ia(i) + tcoo%ja(i) = a%ja(i) + tcoo%val(i) = alpha*a%val(i) + end do + !$omp parallel do private(i) + do i=1, nzb + tcoo%ia(nza+i) = bcoo%ia(i) + tcoo%ja(nza+i) = bcoo%ja(i) + tcoo%val(nza+i) = beta*bcoo%val(i) + end do + end block +#else tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ja(1:nza) = a%ja(1:nza) tcoo%val(1:nza) = alpha*a%val(1:nza) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb) +#endif ! Fix the indexes call tcoo%fix(info) ! Move to correct output format @@ -346,12 +367,30 @@ function psb_d_coo_cmpmat(a,b,tol,info) result(res) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum +#if defined (OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nza + tcoo%ia(i) = a%ia(i) + tcoo%ja(i) = a%ja(i) + tcoo%val(i) = alpha*a%val(i) + end do + !$omp parallel do private(i) + do i=1, nzb + tcoo%ia(nza+i) = bcoo%ia(i) + tcoo%ja(nza+i) = bcoo%ja(i) + tcoo%val(nza+i) = (-done)*beta*bcoo%val(i) + end do + end block +#else tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ja(1:nza) = a%ja(1:nza) tcoo%val(1:nza) = a%val(1:nza) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%val(nza+1:nza+nzb) = (-done)*bcoo%val(1:nzb) +#endif ! Fix the indexes call tcoo%fix(info) if (info /= psb_success_) then @@ -728,9 +767,6 @@ subroutine psb_d_coo_print(iout,a,iv,head,ivr,ivc) end subroutine psb_d_coo_print - - - function psb_d_coo_get_nz_row(idx,a) result(res) use psb_const_mod use psb_sort_mod @@ -1670,7 +1706,6 @@ subroutine psb_d_coo_csmv(alpha,a,x,beta,y,info,trans) end subroutine psb_d_coo_csmv - subroutine psb_d_coo_csmm(alpha,a,x,beta,y,info,trans) use psb_const_mod use psb_error_mod @@ -1709,11 +1744,9 @@ subroutine psb_d_coo_csmm(alpha,a,x,beta,y,info,trans) trans_ = 'N' end if - tra = (psb_toupper(trans_) == 'T') ctra = (psb_toupper(trans_) == 'C') - if (tra.or.ctra) then m = a%get_ncols() n = a%get_nrows() @@ -1895,7 +1928,15 @@ function psb_d_coo_maxval(a) result(res) nnz = a%get_nzeros() if (allocated(a%val)) then nnz = min(nnz,size(a%val)) +#if defined(OPENMP) + res = dzero + !$omp parallel do private(i) reduction(max: res) + do i=1, nnz + res = max(res,abs(a%val(i))) + end do +#else res = maxval(abs(a%val(1:nnz))) +#endif end if end function psb_d_coo_maxval @@ -2275,11 +2316,13 @@ subroutine psb_d_coo_csgetptn(imin,imax,a,nz,ia,ja,info,& & iren) if (rscale_) then + !$omp parallel do private(i) do i=nzin_+1, nzin_+nz ia(i) = ia(i) - imin + 1 end do end if if (cscale_) then + !$omp parallel do private(i) do i=nzin_+1, nzin_+nz ja(i) = ja(i) - jmin_ + 1 end do @@ -2553,11 +2596,13 @@ subroutine psb_d_coo_csgetrow(imin,imax,a,nz,ia,ja,val,info,& & iren) if (rscale_) then + !$omp parallel do private(i) do i=nzin_+1, nzin_+nz ia(i) = ia(i) - imin + 1 end do end if if (cscale_) then + !$omp parallel do private(i) do i=nzin_+1, nzin_+nz ja(i) = ja(i) - jmin_ + 1 end do @@ -2768,7 +2813,6 @@ contains end subroutine psb_d_coo_csgetrow - subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_error_mod use psb_realloc_mod @@ -3021,7 +3065,6 @@ contains end subroutine psb_d_coo_csput_a - subroutine psb_d_cp_coo_to_coo(a,b,info) use psb_error_mod use psb_d_base_mat_mod, psb_protect_name => psb_d_cp_coo_to_coo @@ -3045,10 +3088,21 @@ subroutine psb_d_cp_coo_to_coo(a,b,info) call b%set_nzeros(nz) call b%reallocate(nz) +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + b%ia(i) = a%ia(i) + b%ja(i) = a%ja(i) + b%val(i) = a%val(i) + end do + end block +#else b%ia(1:nz) = a%ia(1:nz) b%ja(1:nz) = a%ja(1:nz) b%val(1:nz) = a%val(1:nz) - +#endif call b%set_host() if (.not.b%is_by_rows()) call b%fix(info) @@ -3087,10 +3141,21 @@ subroutine psb_d_cp_coo_from_coo(a,b,info) call a%set_nzeros(nz) call a%reallocate(nz) +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + a%ia(i) = b%ia(i) + a%ja(i) = b%ja(i) + a%val(i) = b%val(i) + end do + end block +#else a%ia(1:nz) = b%ia(1:nz) a%ja(1:nz) = b%ja(1:nz) a%val(1:nz) = b%val(1:nz) - +#endif call a%set_host() if (.not.a%is_by_rows()) call a%fix(info) @@ -3445,8 +3510,6 @@ subroutine psb_d_fix_coo(a,info,idir) end subroutine psb_d_fix_coo - - subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) use psb_const_mod use psb_error_mod @@ -4174,7 +4237,6 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) end subroutine psb_d_fix_coo_inner - subroutine psb_d_cp_coo_to_lcoo(a,b,info) use psb_error_mod use psb_d_base_mat_mod, psb_protect_name => psb_d_cp_coo_to_lcoo @@ -4199,10 +4261,21 @@ subroutine psb_d_cp_coo_to_lcoo(a,b,info) call b%set_nzeros(nz) call b%reallocate(nz) +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + b%ia(i) = a%ia(i) + b%ja(i) = a%ja(i) + b%val(i) = a%val(i) + end do + end block +#else b%ia(1:nz) = a%ia(1:nz) b%ja(1:nz) = a%ja(1:nz) b%val(1:nz) = a%val(1:nz) - +#endif call b%set_host() if (.not.b%is_by_rows()) call b%fix(info) @@ -4240,10 +4313,21 @@ subroutine psb_d_cp_coo_from_lcoo(a,b,info) call a%set_nzeros(nz) call a%reallocate(nz) +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + a%ia(i) = b%ia(i) + a%ja(i) = b%ja(i) + a%val(i) = b%val(i) + end do + end block +#else a%ia(1:nz) = b%ia(1:nz) a%ja(1:nz) = b%ja(1:nz) a%val(1:nz) = b%val(1:nz) - +#endif call a%set_host() if (.not.a%is_by_rows()) call a%fix(info) @@ -4442,7 +4526,17 @@ function psb_ld_coo_maxval(a) result(res) nnz = a%get_nzeros() if (allocated(a%val)) then nnz = min(nnz,size(a%val)) +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nnz + res = max(res,abs(a%val(i))) + end do + end block +#else res = maxval(abs(a%val(1:nnz))) +#endif end if end function psb_ld_coo_maxval @@ -4499,7 +4593,17 @@ function psb_ld_coo_csnmi(a) result(res) i = a%ia(j) vt(i) = vt(i) + abs(a%val(j)) end do +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, m + res = max(res,abs(vt(i))) + end do + end block +#else res = maxval(vt(1:m)) +#endif deallocate(vt,stat=info) end if @@ -4539,7 +4643,17 @@ function psb_ld_coo_csnm1(a) result(res) i = a%ja(j) vt(i) = vt(i) + abs(a%val(j)) end do +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, n + res = max(res,abs(vt(i))) + end do + end block +#else res = maxval(vt(1:n)) +#endif deallocate(vt,stat=info) return @@ -4584,7 +4698,6 @@ subroutine psb_ld_coo_rowsum(d,a) d(i) = d(i) + a%val(j) end do - return call psb_erractionrestore(err_act) return @@ -4592,7 +4705,6 @@ subroutine psb_ld_coo_rowsum(d,a) 9999 call psb_error_handler(err_act) return - end subroutine psb_ld_coo_rowsum subroutine psb_ld_coo_arwsum(d,a) @@ -4761,6 +4873,7 @@ subroutine psb_ld_coo_scalplusidentity(d,a,info) end if mnm = min(a%get_nrows(),a%get_ncols()) + !$omp parallel do private(i,j) do i=1,a%get_nzeros() a%val(i) = a%val(i) * d j=a%ia(i) @@ -4813,12 +4926,30 @@ subroutine psb_ld_coo_spaxpby(alpha,a,beta,b,info) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nza + tcoo%ia(i) = a%ia(i) + tcoo%ja(i) = a%ja(i) + tcoo%val(i) = alpha*a%val(i) + end do + !$omp parallel do private(i) + do i=1, nzb + tcoo%ia(nza+i) = bcoo%ia(i) + tcoo%ja(nza+i) = bcoo%ja(i) + tcoo%val(nza+i) = beta*bcoo%val(i) + end do + end block +#else tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ja(1:nza) = a%ja(1:nza) tcoo%val(1:nza) = alpha*a%val(1:nza) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb) +#endif ! Fix the indexes call tcoo%fix(info) ! Move to correct output format @@ -4906,12 +5037,30 @@ function psb_ld_coo_cmpmat(a,b,tol,info) result(res) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nza + tcoo%ia(i) = a%ia(i) + tcoo%ja(i) = a%ja(i) + tcoo%val(i) = a%val(i) + end do + !$omp parallel do private(i) + do i=1, nzb + tcoo%ia(nza+i) = bcoo%ia(i) + tcoo%ja(nza+i) = bcoo%ja(i) + tcoo%val(nza+i) = (-1_psb_dpk_)*bcoo%val(i) + end do + end block +#else tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ja(1:nza) = a%ja(1:nza) tcoo%val(1:nza) = a%val(1:nza) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%val(nza+1:nza+nzb) = (-1_psb_dpk_)*bcoo%val(1:nzb) +#endif ! Fix the indexes call tcoo%fix(info) if (info /= psb_success_) then @@ -5950,7 +6099,7 @@ subroutine psb_ld_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (nz < 0) then info = psb_err_iarg_neg_ - call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) +3 call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) goto 9999 end if if (size(ia) < nz) then diff --git a/base/serial/impl/psb_d_csr_impl.f90 b/base/serial/impl/psb_d_csr_impl.f90 index 0c572ac3..853476d6 100644 --- a/base/serial/impl/psb_d_csr_impl.f90 +++ b/base/serial/impl/psb_d_csr_impl.f90 @@ -130,10 +130,12 @@ contains if (alpha == dzero) then if (beta == dzero) then + !$omp parallel do private(i) do i = 1, m y(i) = dzero enddo else + !$omp parallel do private(i) do i = 1, m y(i) = beta*y(i) end do @@ -147,6 +149,7 @@ contains if (beta == dzero) then if (alpha == done) then + !$omp parallel do private(i,j, acc) schedule(static) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -157,6 +160,7 @@ contains else if (alpha == -done) then + !$omp parallel do private(i,j, acc) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -167,6 +171,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -181,6 +186,7 @@ contains else if (beta == done) then if (alpha == done) then + !$omp parallel do private(i,j,acc) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -191,6 +197,7 @@ contains else if (alpha == -done) then + !$omp parallel do private(i,j,acc) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -201,6 +208,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -214,6 +222,7 @@ contains else if (beta == -done) then if (alpha == done) then + !$omp parallel do private(i,j,acc) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -224,6 +233,7 @@ contains else if (alpha == -done) then + !$omp parallel do private(i,j,acc) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -234,6 +244,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -247,6 +258,7 @@ contains else if (alpha == done) then + !$omp parallel do private(i,j,acc) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -257,6 +269,7 @@ contains else if (alpha == -done) then + !$omp parallel do private(i,j,acc) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -267,6 +280,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -282,16 +296,19 @@ contains else if (tra) then if (beta == dzero) then + !$omp parallel do private(i) do i=1, m y(i) = dzero end do else if (beta == done) then ! Do nothing else if (beta == -done) then + !$omp parallel do private(i) do i=1, m y(i) = -y(i) end do else + !$omp parallel do private(i) do i=1, m y(i) = beta*y(i) end do @@ -476,16 +493,18 @@ contains real(psb_dpk_), intent(inout) :: y(ldy,*) logical, intent(in) :: is_triangle,is_unit,tra,ctra - real(psb_dpk_), intent(inout) :: acc(*) + real(psb_dpk_), intent(inout) :: acc(:) integer(psb_ipk_) :: i,j, ir if (alpha == dzero) then if (beta == dzero) then + !$omp parallel do private(i) do i = 1, m y(i,1:nc) = dzero enddo else + !$omp parallel do private(i) do i = 1, m y(i,1:nc) = beta*y(i,1:nc) end do @@ -497,6 +516,7 @@ contains if (beta == dzero) then if (alpha == done) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = dzero do j=irp(i), irp(i+1)-1 @@ -507,6 +527,7 @@ contains else if (alpha == -done) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = dzero do j=irp(i), irp(i+1)-1 @@ -517,6 +538,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = dzero do j=irp(i), irp(i+1)-1 @@ -531,6 +553,7 @@ contains else if (beta == done) then if (alpha == done) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = dzero do j=irp(i), irp(i+1)-1 @@ -541,6 +564,7 @@ contains else if (alpha == -done) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = dzero do j=irp(i), irp(i+1)-1 @@ -551,6 +575,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = dzero do j=irp(i), irp(i+1)-1 @@ -564,6 +589,7 @@ contains else if (beta == -done) then if (alpha == done) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = dzero do j=irp(i), irp(i+1)-1 @@ -574,6 +600,7 @@ contains else if (alpha == -done) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = dzero do j=irp(i), irp(i+1)-1 @@ -584,6 +611,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = dzero do j=irp(i), irp(i+1)-1 @@ -597,6 +625,7 @@ contains else if (alpha == done) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = dzero do j=irp(i), irp(i+1)-1 @@ -607,6 +636,7 @@ contains else if (alpha == -done) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = dzero do j=irp(i), irp(i+1)-1 @@ -617,6 +647,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = dzero do j=irp(i), irp(i+1)-1 diff --git a/base/serial/impl/psb_d_mat_impl.F90 b/base/serial/impl/psb_d_mat_impl.F90 index 86de5536..7f4ac0c1 100644 --- a/base/serial/impl/psb_d_mat_impl.F90 +++ b/base/serial/impl/psb_d_mat_impl.F90 @@ -582,7 +582,7 @@ end subroutine psb_d_get_neigh -subroutine psb_d_csall(nr,nc,a,info,nz) +subroutine psb_d_csall(nr,nc,a,info,nz,type,mold) use psb_d_mat_mod, psb_protect_name => psb_d_csall use psb_d_base_mat_mod use psb_error_mod @@ -591,6 +591,8 @@ subroutine psb_d_csall(nr,nc,a,info,nz) integer(psb_ipk_), intent(in) :: nr,nc integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nz + character(len=*), intent(in), optional :: type + class(psb_d_base_sparse_mat), optional, intent(in) :: mold integer(psb_ipk_) :: err_act character(len=20) :: name='csall' @@ -601,7 +603,23 @@ subroutine psb_d_csall(nr,nc,a,info,nz) call a%free() info = psb_success_ - allocate(psb_d_coo_sparse_mat :: a%a, stat=info) + if (present(mold)) then + allocate(a%a, stat=info, mold=mold) + else if (present(type)) then + select case (type) + case('CSR') + allocate(psb_d_csr_sparse_mat :: a%a, stat=info) + case('COO') + allocate(psb_d_coo_sparse_mat :: a%a, stat=info) + case('CSC') + allocate(psb_d_csc_sparse_mat :: a%a, stat=info) + case default + allocate(psb_d_coo_sparse_mat :: a%a, stat=info) + end select + else + allocate(psb_d_coo_sparse_mat :: a%a, stat=info) + end if + if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info, name) @@ -3381,7 +3399,7 @@ end subroutine psb_ld_get_neigh -subroutine psb_ld_csall(nr,nc,a,info,nz) +subroutine psb_ld_csall(nr,nc,a,info,nz,type,mold) use psb_d_mat_mod, psb_protect_name => psb_ld_csall use psb_d_base_mat_mod use psb_error_mod @@ -3390,6 +3408,8 @@ subroutine psb_ld_csall(nr,nc,a,info,nz) integer(psb_lpk_), intent(in) :: nr,nc integer(psb_ipk_), intent(out) :: info integer(psb_lpk_), intent(in), optional :: nz + character(len=*), intent(in), optional :: type + class(psb_ld_base_sparse_mat), optional, intent(in) :: mold integer(psb_ipk_) :: err_act character(len=20) :: name='csall' @@ -3400,7 +3420,22 @@ subroutine psb_ld_csall(nr,nc,a,info,nz) call a%free() info = psb_success_ - allocate(psb_ld_coo_sparse_mat :: a%a, stat=info) + if (present(mold)) then + allocate(a%a, stat=info, mold=mold) + else if (present(type)) then + select case (type) + case('CSR') + allocate(psb_ld_csr_sparse_mat :: a%a, stat=info) + case('COO') + allocate(psb_ld_coo_sparse_mat :: a%a, stat=info) + case('CSC') + allocate(psb_ld_csc_sparse_mat :: a%a, stat=info) + case default + allocate(psb_ld_coo_sparse_mat :: a%a, stat=info) + end select + else + allocate(psb_ld_coo_sparse_mat :: a%a, stat=info) + end if if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info, name) diff --git a/base/serial/impl/psb_s_base_mat_impl.F90 b/base/serial/impl/psb_s_base_mat_impl.F90 index 7a3f647d..4a99a684 100644 --- a/base/serial/impl/psb_s_base_mat_impl.F90 +++ b/base/serial/impl/psb_s_base_mat_impl.F90 @@ -250,7 +250,6 @@ subroutine psb_s_base_mv_from_coo(a,b,info) end subroutine psb_s_base_mv_from_coo - subroutine psb_s_base_mv_to_fmt(a,b,info) use psb_s_base_mat_mod, psb_protect_name => psb_s_base_mv_to_fmt use psb_error_mod @@ -698,6 +697,8 @@ subroutine psb_s_base_tril(a,l,info,& call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),val,info) + ! Implementing this in OpenMP? + ! Tricky, to be seen do i=imin_,imax_, nbk ibk = min(nbk,imax_-i+1) call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& @@ -849,6 +850,8 @@ subroutine psb_s_base_triu(a,u,info,& call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),val,info) + ! Implementing this in OpenMP? + ! Tricky, to be seen do i=imin_,imax_, nbk ibk = min(nbk,imax_-i+1) call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& @@ -914,8 +917,6 @@ subroutine psb_s_base_triu(a,u,info,& end subroutine psb_s_base_triu - - subroutine psb_s_base_clone(a,b,info) use psb_s_base_mat_mod, psb_protect_name => psb_s_base_clone use psb_error_mod @@ -960,6 +961,7 @@ subroutine psb_s_base_make_nonunit(a) mnm = min(m,n) nz = tmp%get_nzeros() call tmp%reallocate(nz+mnm) + !$omp parallel do private(i) shared(nz) do i=1, mnm tmp%val(nz+i) = sone tmp%ia(nz+i) = i @@ -1506,6 +1508,7 @@ contains real(psb_spk_), intent(out) :: y(*) integer(psb_ipk_) :: i + !$omp parallel do private(i) do i=1,n y(i) = d(i)*x(i) end do @@ -1519,6 +1522,7 @@ contains real(psb_spk_), intent(inout) :: x(*) integer(psb_ipk_) :: i + !$omp parallel do private(i) do i=1,n x(i) = d(i)*x(i) end do @@ -3182,6 +3186,8 @@ subroutine psb_ls_base_tril(a,l,info,& call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),val,info) + ! Implementing this in OpenMP? + ! Tricky, to be seen do i=imin_,imax_, nbk ibk = min(nbk,imax_-i+1) call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& @@ -3334,6 +3340,8 @@ subroutine psb_ls_base_triu(a,u,info,& call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),val,info) + ! Implementing this in OpenMP? + ! Tricky, to be seen do i=imin_,imax_, nbk ibk = min(nbk,imax_-i+1) call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& @@ -3446,6 +3454,7 @@ subroutine psb_ls_base_make_nonunit(a) mnm = min(m,n) nz = tmp%get_nzeros() call tmp%reallocate(nz+mnm) + !$omp parallel do private(i) shared(nz) do i=1, mnm tmp%val(nz+i) = sone tmp%ia(nz+i) = i diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index 061fb904..d214b2d5 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -114,6 +114,7 @@ subroutine psb_s_coo_scal(d,a,info,side) goto 9999 end if + !$omp parallel do private(i,j) do i=1,a%get_nzeros() j = a%ia(i) a%val(i) = a%val(i) * d(j) @@ -126,6 +127,7 @@ subroutine psb_s_coo_scal(d,a,info,side) goto 9999 end if + !$omp parallel do private(i,j) do i=1,a%get_nzeros() j = a%ja(i) a%val(i) = a%val(i) * d(j) @@ -201,6 +203,7 @@ subroutine psb_s_coo_scalplusidentity(d,a,info) end if mnm = min(a%get_nrows(),a%get_ncols()) + !$omp parallel do private(i,j) do i=1,a%get_nzeros() a%val(i) = a%val(i) * d j=a%ia(i) @@ -253,12 +256,30 @@ subroutine psb_s_coo_spaxpby(alpha,a,beta,b,info) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum +#if defined (OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nza + tcoo%ia(i) = a%ia(i) + tcoo%ja(i) = a%ja(i) + tcoo%val(i) = alpha*a%val(i) + end do + !$omp parallel do private(i) + do i=1, nzb + tcoo%ia(nza+i) = bcoo%ia(i) + tcoo%ja(nza+i) = bcoo%ja(i) + tcoo%val(nza+i) = beta*bcoo%val(i) + end do + end block +#else tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ja(1:nza) = a%ja(1:nza) tcoo%val(1:nza) = alpha*a%val(1:nza) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb) +#endif ! Fix the indexes call tcoo%fix(info) ! Move to correct output format @@ -346,12 +367,30 @@ function psb_s_coo_cmpmat(a,b,tol,info) result(res) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum +#if defined (OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nza + tcoo%ia(i) = a%ia(i) + tcoo%ja(i) = a%ja(i) + tcoo%val(i) = alpha*a%val(i) + end do + !$omp parallel do private(i) + do i=1, nzb + tcoo%ia(nza+i) = bcoo%ia(i) + tcoo%ja(nza+i) = bcoo%ja(i) + tcoo%val(nza+i) = (-sone)*beta*bcoo%val(i) + end do + end block +#else tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ja(1:nza) = a%ja(1:nza) tcoo%val(1:nza) = a%val(1:nza) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%val(nza+1:nza+nzb) = (-sone)*bcoo%val(1:nzb) +#endif ! Fix the indexes call tcoo%fix(info) if (info /= psb_success_) then @@ -728,9 +767,6 @@ subroutine psb_s_coo_print(iout,a,iv,head,ivr,ivc) end subroutine psb_s_coo_print - - - function psb_s_coo_get_nz_row(idx,a) result(res) use psb_const_mod use psb_sort_mod @@ -1670,7 +1706,6 @@ subroutine psb_s_coo_csmv(alpha,a,x,beta,y,info,trans) end subroutine psb_s_coo_csmv - subroutine psb_s_coo_csmm(alpha,a,x,beta,y,info,trans) use psb_const_mod use psb_error_mod @@ -1709,11 +1744,9 @@ subroutine psb_s_coo_csmm(alpha,a,x,beta,y,info,trans) trans_ = 'N' end if - tra = (psb_toupper(trans_) == 'T') ctra = (psb_toupper(trans_) == 'C') - if (tra.or.ctra) then m = a%get_ncols() n = a%get_nrows() @@ -1895,7 +1928,15 @@ function psb_s_coo_maxval(a) result(res) nnz = a%get_nzeros() if (allocated(a%val)) then nnz = min(nnz,size(a%val)) +#if defined(OPENMP) + res = szero + !$omp parallel do private(i) reduction(max: res) + do i=1, nnz + res = max(res,abs(a%val(i))) + end do +#else res = maxval(abs(a%val(1:nnz))) +#endif end if end function psb_s_coo_maxval @@ -2275,11 +2316,13 @@ subroutine psb_s_coo_csgetptn(imin,imax,a,nz,ia,ja,info,& & iren) if (rscale_) then + !$omp parallel do private(i) do i=nzin_+1, nzin_+nz ia(i) = ia(i) - imin + 1 end do end if if (cscale_) then + !$omp parallel do private(i) do i=nzin_+1, nzin_+nz ja(i) = ja(i) - jmin_ + 1 end do @@ -2553,11 +2596,13 @@ subroutine psb_s_coo_csgetrow(imin,imax,a,nz,ia,ja,val,info,& & iren) if (rscale_) then + !$omp parallel do private(i) do i=nzin_+1, nzin_+nz ia(i) = ia(i) - imin + 1 end do end if if (cscale_) then + !$omp parallel do private(i) do i=nzin_+1, nzin_+nz ja(i) = ja(i) - jmin_ + 1 end do @@ -2768,7 +2813,6 @@ contains end subroutine psb_s_coo_csgetrow - subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_error_mod use psb_realloc_mod @@ -3021,7 +3065,6 @@ contains end subroutine psb_s_coo_csput_a - subroutine psb_s_cp_coo_to_coo(a,b,info) use psb_error_mod use psb_s_base_mat_mod, psb_protect_name => psb_s_cp_coo_to_coo @@ -3045,10 +3088,21 @@ subroutine psb_s_cp_coo_to_coo(a,b,info) call b%set_nzeros(nz) call b%reallocate(nz) +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + b%ia(i) = a%ia(i) + b%ja(i) = a%ja(i) + b%val(i) = a%val(i) + end do + end block +#else b%ia(1:nz) = a%ia(1:nz) b%ja(1:nz) = a%ja(1:nz) b%val(1:nz) = a%val(1:nz) - +#endif call b%set_host() if (.not.b%is_by_rows()) call b%fix(info) @@ -3087,10 +3141,21 @@ subroutine psb_s_cp_coo_from_coo(a,b,info) call a%set_nzeros(nz) call a%reallocate(nz) +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + a%ia(i) = b%ia(i) + a%ja(i) = b%ja(i) + a%val(i) = b%val(i) + end do + end block +#else a%ia(1:nz) = b%ia(1:nz) a%ja(1:nz) = b%ja(1:nz) a%val(1:nz) = b%val(1:nz) - +#endif call a%set_host() if (.not.a%is_by_rows()) call a%fix(info) @@ -3445,8 +3510,6 @@ subroutine psb_s_fix_coo(a,info,idir) end subroutine psb_s_fix_coo - - subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) use psb_const_mod use psb_error_mod @@ -4174,7 +4237,6 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) end subroutine psb_s_fix_coo_inner - subroutine psb_s_cp_coo_to_lcoo(a,b,info) use psb_error_mod use psb_s_base_mat_mod, psb_protect_name => psb_s_cp_coo_to_lcoo @@ -4199,10 +4261,21 @@ subroutine psb_s_cp_coo_to_lcoo(a,b,info) call b%set_nzeros(nz) call b%reallocate(nz) +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + b%ia(i) = a%ia(i) + b%ja(i) = a%ja(i) + b%val(i) = a%val(i) + end do + end block +#else b%ia(1:nz) = a%ia(1:nz) b%ja(1:nz) = a%ja(1:nz) b%val(1:nz) = a%val(1:nz) - +#endif call b%set_host() if (.not.b%is_by_rows()) call b%fix(info) @@ -4240,10 +4313,21 @@ subroutine psb_s_cp_coo_from_lcoo(a,b,info) call a%set_nzeros(nz) call a%reallocate(nz) +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + a%ia(i) = b%ia(i) + a%ja(i) = b%ja(i) + a%val(i) = b%val(i) + end do + end block +#else a%ia(1:nz) = b%ia(1:nz) a%ja(1:nz) = b%ja(1:nz) a%val(1:nz) = b%val(1:nz) - +#endif call a%set_host() if (.not.a%is_by_rows()) call a%fix(info) @@ -4442,7 +4526,17 @@ function psb_ls_coo_maxval(a) result(res) nnz = a%get_nzeros() if (allocated(a%val)) then nnz = min(nnz,size(a%val)) +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nnz + res = max(res,abs(a%val(i))) + end do + end block +#else res = maxval(abs(a%val(1:nnz))) +#endif end if end function psb_ls_coo_maxval @@ -4499,7 +4593,17 @@ function psb_ls_coo_csnmi(a) result(res) i = a%ia(j) vt(i) = vt(i) + abs(a%val(j)) end do +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, m + res = max(res,abs(vt(i))) + end do + end block +#else res = maxval(vt(1:m)) +#endif deallocate(vt,stat=info) end if @@ -4539,7 +4643,17 @@ function psb_ls_coo_csnm1(a) result(res) i = a%ja(j) vt(i) = vt(i) + abs(a%val(j)) end do +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, n + res = max(res,abs(vt(i))) + end do + end block +#else res = maxval(vt(1:n)) +#endif deallocate(vt,stat=info) return @@ -4584,7 +4698,6 @@ subroutine psb_ls_coo_rowsum(d,a) d(i) = d(i) + a%val(j) end do - return call psb_erractionrestore(err_act) return @@ -4592,7 +4705,6 @@ subroutine psb_ls_coo_rowsum(d,a) 9999 call psb_error_handler(err_act) return - end subroutine psb_ls_coo_rowsum subroutine psb_ls_coo_arwsum(d,a) @@ -4761,6 +4873,7 @@ subroutine psb_ls_coo_scalplusidentity(d,a,info) end if mnm = min(a%get_nrows(),a%get_ncols()) + !$omp parallel do private(i,j) do i=1,a%get_nzeros() a%val(i) = a%val(i) * d j=a%ia(i) @@ -4813,12 +4926,30 @@ subroutine psb_ls_coo_spaxpby(alpha,a,beta,b,info) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nza + tcoo%ia(i) = a%ia(i) + tcoo%ja(i) = a%ja(i) + tcoo%val(i) = alpha*a%val(i) + end do + !$omp parallel do private(i) + do i=1, nzb + tcoo%ia(nza+i) = bcoo%ia(i) + tcoo%ja(nza+i) = bcoo%ja(i) + tcoo%val(nza+i) = beta*bcoo%val(i) + end do + end block +#else tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ja(1:nza) = a%ja(1:nza) tcoo%val(1:nza) = alpha*a%val(1:nza) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb) +#endif ! Fix the indexes call tcoo%fix(info) ! Move to correct output format @@ -4906,12 +5037,30 @@ function psb_ls_coo_cmpmat(a,b,tol,info) result(res) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nza + tcoo%ia(i) = a%ia(i) + tcoo%ja(i) = a%ja(i) + tcoo%val(i) = a%val(i) + end do + !$omp parallel do private(i) + do i=1, nzb + tcoo%ia(nza+i) = bcoo%ia(i) + tcoo%ja(nza+i) = bcoo%ja(i) + tcoo%val(nza+i) = (-1_psb_spk_)*bcoo%val(i) + end do + end block +#else tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ja(1:nza) = a%ja(1:nza) tcoo%val(1:nza) = a%val(1:nza) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%val(nza+1:nza+nzb) = (-1_psb_spk_)*bcoo%val(1:nzb) +#endif ! Fix the indexes call tcoo%fix(info) if (info /= psb_success_) then @@ -5950,7 +6099,7 @@ subroutine psb_ls_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (nz < 0) then info = psb_err_iarg_neg_ - call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) +3 call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) goto 9999 end if if (size(ia) < nz) then diff --git a/base/serial/impl/psb_s_csr_impl.f90 b/base/serial/impl/psb_s_csr_impl.f90 index a0c56d35..76140d07 100644 --- a/base/serial/impl/psb_s_csr_impl.f90 +++ b/base/serial/impl/psb_s_csr_impl.f90 @@ -130,10 +130,12 @@ contains if (alpha == szero) then if (beta == szero) then + !$omp parallel do private(i) do i = 1, m y(i) = szero enddo else + !$omp parallel do private(i) do i = 1, m y(i) = beta*y(i) end do @@ -147,6 +149,7 @@ contains if (beta == szero) then if (alpha == sone) then + !$omp parallel do private(i,j, acc) schedule(static) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 @@ -157,6 +160,7 @@ contains else if (alpha == -sone) then + !$omp parallel do private(i,j, acc) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 @@ -167,6 +171,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 @@ -181,6 +186,7 @@ contains else if (beta == sone) then if (alpha == sone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 @@ -191,6 +197,7 @@ contains else if (alpha == -sone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 @@ -201,6 +208,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 @@ -214,6 +222,7 @@ contains else if (beta == -sone) then if (alpha == sone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 @@ -224,6 +233,7 @@ contains else if (alpha == -sone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 @@ -234,6 +244,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 @@ -247,6 +258,7 @@ contains else if (alpha == sone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 @@ -257,6 +269,7 @@ contains else if (alpha == -sone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 @@ -267,6 +280,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc = szero do j=irp(i), irp(i+1)-1 @@ -282,16 +296,19 @@ contains else if (tra) then if (beta == szero) then + !$omp parallel do private(i) do i=1, m y(i) = szero end do else if (beta == sone) then ! Do nothing else if (beta == -sone) then + !$omp parallel do private(i) do i=1, m y(i) = -y(i) end do else + !$omp parallel do private(i) do i=1, m y(i) = beta*y(i) end do @@ -476,16 +493,18 @@ contains real(psb_spk_), intent(inout) :: y(ldy,*) logical, intent(in) :: is_triangle,is_unit,tra,ctra - real(psb_spk_), intent(inout) :: acc(*) + real(psb_spk_), intent(inout) :: acc(:) integer(psb_ipk_) :: i,j, ir if (alpha == szero) then if (beta == szero) then + !$omp parallel do private(i) do i = 1, m y(i,1:nc) = szero enddo else + !$omp parallel do private(i) do i = 1, m y(i,1:nc) = beta*y(i,1:nc) end do @@ -497,6 +516,7 @@ contains if (beta == szero) then if (alpha == sone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = szero do j=irp(i), irp(i+1)-1 @@ -507,6 +527,7 @@ contains else if (alpha == -sone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = szero do j=irp(i), irp(i+1)-1 @@ -517,6 +538,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = szero do j=irp(i), irp(i+1)-1 @@ -531,6 +553,7 @@ contains else if (beta == sone) then if (alpha == sone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = szero do j=irp(i), irp(i+1)-1 @@ -541,6 +564,7 @@ contains else if (alpha == -sone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = szero do j=irp(i), irp(i+1)-1 @@ -551,6 +575,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = szero do j=irp(i), irp(i+1)-1 @@ -564,6 +589,7 @@ contains else if (beta == -sone) then if (alpha == sone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = szero do j=irp(i), irp(i+1)-1 @@ -574,6 +600,7 @@ contains else if (alpha == -sone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = szero do j=irp(i), irp(i+1)-1 @@ -584,6 +611,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = szero do j=irp(i), irp(i+1)-1 @@ -597,6 +625,7 @@ contains else if (alpha == sone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = szero do j=irp(i), irp(i+1)-1 @@ -607,6 +636,7 @@ contains else if (alpha == -sone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = szero do j=irp(i), irp(i+1)-1 @@ -617,6 +647,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = szero do j=irp(i), irp(i+1)-1 diff --git a/base/serial/impl/psb_s_mat_impl.F90 b/base/serial/impl/psb_s_mat_impl.F90 index 867f9fa4..806a08e3 100644 --- a/base/serial/impl/psb_s_mat_impl.F90 +++ b/base/serial/impl/psb_s_mat_impl.F90 @@ -582,7 +582,7 @@ end subroutine psb_s_get_neigh -subroutine psb_s_csall(nr,nc,a,info,nz) +subroutine psb_s_csall(nr,nc,a,info,nz,type,mold) use psb_s_mat_mod, psb_protect_name => psb_s_csall use psb_s_base_mat_mod use psb_error_mod @@ -591,6 +591,8 @@ subroutine psb_s_csall(nr,nc,a,info,nz) integer(psb_ipk_), intent(in) :: nr,nc integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nz + character(len=*), intent(in), optional :: type + class(psb_s_base_sparse_mat), optional, intent(in) :: mold integer(psb_ipk_) :: err_act character(len=20) :: name='csall' @@ -601,7 +603,23 @@ subroutine psb_s_csall(nr,nc,a,info,nz) call a%free() info = psb_success_ - allocate(psb_s_coo_sparse_mat :: a%a, stat=info) + if (present(mold)) then + allocate(a%a, stat=info, mold=mold) + else if (present(type)) then + select case (type) + case('CSR') + allocate(psb_s_csr_sparse_mat :: a%a, stat=info) + case('COO') + allocate(psb_s_coo_sparse_mat :: a%a, stat=info) + case('CSC') + allocate(psb_s_csc_sparse_mat :: a%a, stat=info) + case default + allocate(psb_s_coo_sparse_mat :: a%a, stat=info) + end select + else + allocate(psb_s_coo_sparse_mat :: a%a, stat=info) + end if + if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info, name) @@ -3381,7 +3399,7 @@ end subroutine psb_ls_get_neigh -subroutine psb_ls_csall(nr,nc,a,info,nz) +subroutine psb_ls_csall(nr,nc,a,info,nz,type,mold) use psb_s_mat_mod, psb_protect_name => psb_ls_csall use psb_s_base_mat_mod use psb_error_mod @@ -3390,6 +3408,8 @@ subroutine psb_ls_csall(nr,nc,a,info,nz) integer(psb_lpk_), intent(in) :: nr,nc integer(psb_ipk_), intent(out) :: info integer(psb_lpk_), intent(in), optional :: nz + character(len=*), intent(in), optional :: type + class(psb_ls_base_sparse_mat), optional, intent(in) :: mold integer(psb_ipk_) :: err_act character(len=20) :: name='csall' @@ -3400,7 +3420,22 @@ subroutine psb_ls_csall(nr,nc,a,info,nz) call a%free() info = psb_success_ - allocate(psb_ls_coo_sparse_mat :: a%a, stat=info) + if (present(mold)) then + allocate(a%a, stat=info, mold=mold) + else if (present(type)) then + select case (type) + case('CSR') + allocate(psb_ls_csr_sparse_mat :: a%a, stat=info) + case('COO') + allocate(psb_ls_coo_sparse_mat :: a%a, stat=info) + case('CSC') + allocate(psb_ls_csc_sparse_mat :: a%a, stat=info) + case default + allocate(psb_ls_coo_sparse_mat :: a%a, stat=info) + end select + else + allocate(psb_ls_coo_sparse_mat :: a%a, stat=info) + end if if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info, name) diff --git a/base/serial/impl/psb_z_base_mat_impl.F90 b/base/serial/impl/psb_z_base_mat_impl.F90 index fbbbd83d..404027c5 100644 --- a/base/serial/impl/psb_z_base_mat_impl.F90 +++ b/base/serial/impl/psb_z_base_mat_impl.F90 @@ -250,7 +250,6 @@ subroutine psb_z_base_mv_from_coo(a,b,info) end subroutine psb_z_base_mv_from_coo - subroutine psb_z_base_mv_to_fmt(a,b,info) use psb_z_base_mat_mod, psb_protect_name => psb_z_base_mv_to_fmt use psb_error_mod @@ -698,6 +697,8 @@ subroutine psb_z_base_tril(a,l,info,& call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),val,info) + ! Implementing this in OpenMP? + ! Tricky, to be seen do i=imin_,imax_, nbk ibk = min(nbk,imax_-i+1) call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& @@ -849,6 +850,8 @@ subroutine psb_z_base_triu(a,u,info,& call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),val,info) + ! Implementing this in OpenMP? + ! Tricky, to be seen do i=imin_,imax_, nbk ibk = min(nbk,imax_-i+1) call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& @@ -914,8 +917,6 @@ subroutine psb_z_base_triu(a,u,info,& end subroutine psb_z_base_triu - - subroutine psb_z_base_clone(a,b,info) use psb_z_base_mat_mod, psb_protect_name => psb_z_base_clone use psb_error_mod @@ -960,6 +961,7 @@ subroutine psb_z_base_make_nonunit(a) mnm = min(m,n) nz = tmp%get_nzeros() call tmp%reallocate(nz+mnm) + !$omp parallel do private(i) shared(nz) do i=1, mnm tmp%val(nz+i) = zone tmp%ia(nz+i) = i @@ -1506,6 +1508,7 @@ contains complex(psb_dpk_), intent(out) :: y(*) integer(psb_ipk_) :: i + !$omp parallel do private(i) do i=1,n y(i) = d(i)*x(i) end do @@ -1519,6 +1522,7 @@ contains complex(psb_dpk_), intent(inout) :: x(*) integer(psb_ipk_) :: i + !$omp parallel do private(i) do i=1,n x(i) = d(i)*x(i) end do @@ -3182,6 +3186,8 @@ subroutine psb_lz_base_tril(a,l,info,& call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),val,info) + ! Implementing this in OpenMP? + ! Tricky, to be seen do i=imin_,imax_, nbk ibk = min(nbk,imax_-i+1) call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& @@ -3334,6 +3340,8 @@ subroutine psb_lz_base_triu(a,u,info,& call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),val,info) + ! Implementing this in OpenMP? + ! Tricky, to be seen do i=imin_,imax_, nbk ibk = min(nbk,imax_-i+1) call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& @@ -3446,6 +3454,7 @@ subroutine psb_lz_base_make_nonunit(a) mnm = min(m,n) nz = tmp%get_nzeros() call tmp%reallocate(nz+mnm) + !$omp parallel do private(i) shared(nz) do i=1, mnm tmp%val(nz+i) = zone tmp%ia(nz+i) = i diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index 2da38296..7850aeec 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -114,6 +114,7 @@ subroutine psb_z_coo_scal(d,a,info,side) goto 9999 end if + !$omp parallel do private(i,j) do i=1,a%get_nzeros() j = a%ia(i) a%val(i) = a%val(i) * d(j) @@ -126,6 +127,7 @@ subroutine psb_z_coo_scal(d,a,info,side) goto 9999 end if + !$omp parallel do private(i,j) do i=1,a%get_nzeros() j = a%ja(i) a%val(i) = a%val(i) * d(j) @@ -201,6 +203,7 @@ subroutine psb_z_coo_scalplusidentity(d,a,info) end if mnm = min(a%get_nrows(),a%get_ncols()) + !$omp parallel do private(i,j) do i=1,a%get_nzeros() a%val(i) = a%val(i) * d j=a%ia(i) @@ -253,12 +256,30 @@ subroutine psb_z_coo_spaxpby(alpha,a,beta,b,info) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum +#if defined (OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nza + tcoo%ia(i) = a%ia(i) + tcoo%ja(i) = a%ja(i) + tcoo%val(i) = alpha*a%val(i) + end do + !$omp parallel do private(i) + do i=1, nzb + tcoo%ia(nza+i) = bcoo%ia(i) + tcoo%ja(nza+i) = bcoo%ja(i) + tcoo%val(nza+i) = beta*bcoo%val(i) + end do + end block +#else tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ja(1:nza) = a%ja(1:nza) tcoo%val(1:nza) = alpha*a%val(1:nza) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb) +#endif ! Fix the indexes call tcoo%fix(info) ! Move to correct output format @@ -346,12 +367,30 @@ function psb_z_coo_cmpmat(a,b,tol,info) result(res) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum +#if defined (OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nza + tcoo%ia(i) = a%ia(i) + tcoo%ja(i) = a%ja(i) + tcoo%val(i) = alpha*a%val(i) + end do + !$omp parallel do private(i) + do i=1, nzb + tcoo%ia(nza+i) = bcoo%ia(i) + tcoo%ja(nza+i) = bcoo%ja(i) + tcoo%val(nza+i) = (-done)*beta*bcoo%val(i) + end do + end block +#else tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ja(1:nza) = a%ja(1:nza) tcoo%val(1:nza) = a%val(1:nza) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%val(nza+1:nza+nzb) = (-done)*bcoo%val(1:nzb) +#endif ! Fix the indexes call tcoo%fix(info) if (info /= psb_success_) then @@ -728,9 +767,6 @@ subroutine psb_z_coo_print(iout,a,iv,head,ivr,ivc) end subroutine psb_z_coo_print - - - function psb_z_coo_get_nz_row(idx,a) result(res) use psb_const_mod use psb_sort_mod @@ -1670,7 +1706,6 @@ subroutine psb_z_coo_csmv(alpha,a,x,beta,y,info,trans) end subroutine psb_z_coo_csmv - subroutine psb_z_coo_csmm(alpha,a,x,beta,y,info,trans) use psb_const_mod use psb_error_mod @@ -1709,11 +1744,9 @@ subroutine psb_z_coo_csmm(alpha,a,x,beta,y,info,trans) trans_ = 'N' end if - tra = (psb_toupper(trans_) == 'T') ctra = (psb_toupper(trans_) == 'C') - if (tra.or.ctra) then m = a%get_ncols() n = a%get_nrows() @@ -1895,7 +1928,15 @@ function psb_z_coo_maxval(a) result(res) nnz = a%get_nzeros() if (allocated(a%val)) then nnz = min(nnz,size(a%val)) +#if defined(OPENMP) + res = dzero + !$omp parallel do private(i) reduction(max: res) + do i=1, nnz + res = max(res,abs(a%val(i))) + end do +#else res = maxval(abs(a%val(1:nnz))) +#endif end if end function psb_z_coo_maxval @@ -2275,11 +2316,13 @@ subroutine psb_z_coo_csgetptn(imin,imax,a,nz,ia,ja,info,& & iren) if (rscale_) then + !$omp parallel do private(i) do i=nzin_+1, nzin_+nz ia(i) = ia(i) - imin + 1 end do end if if (cscale_) then + !$omp parallel do private(i) do i=nzin_+1, nzin_+nz ja(i) = ja(i) - jmin_ + 1 end do @@ -2553,11 +2596,13 @@ subroutine psb_z_coo_csgetrow(imin,imax,a,nz,ia,ja,val,info,& & iren) if (rscale_) then + !$omp parallel do private(i) do i=nzin_+1, nzin_+nz ia(i) = ia(i) - imin + 1 end do end if if (cscale_) then + !$omp parallel do private(i) do i=nzin_+1, nzin_+nz ja(i) = ja(i) - jmin_ + 1 end do @@ -2768,7 +2813,6 @@ contains end subroutine psb_z_coo_csgetrow - subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_error_mod use psb_realloc_mod @@ -3021,7 +3065,6 @@ contains end subroutine psb_z_coo_csput_a - subroutine psb_z_cp_coo_to_coo(a,b,info) use psb_error_mod use psb_z_base_mat_mod, psb_protect_name => psb_z_cp_coo_to_coo @@ -3045,10 +3088,21 @@ subroutine psb_z_cp_coo_to_coo(a,b,info) call b%set_nzeros(nz) call b%reallocate(nz) +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + b%ia(i) = a%ia(i) + b%ja(i) = a%ja(i) + b%val(i) = a%val(i) + end do + end block +#else b%ia(1:nz) = a%ia(1:nz) b%ja(1:nz) = a%ja(1:nz) b%val(1:nz) = a%val(1:nz) - +#endif call b%set_host() if (.not.b%is_by_rows()) call b%fix(info) @@ -3087,10 +3141,21 @@ subroutine psb_z_cp_coo_from_coo(a,b,info) call a%set_nzeros(nz) call a%reallocate(nz) +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + a%ia(i) = b%ia(i) + a%ja(i) = b%ja(i) + a%val(i) = b%val(i) + end do + end block +#else a%ia(1:nz) = b%ia(1:nz) a%ja(1:nz) = b%ja(1:nz) a%val(1:nz) = b%val(1:nz) - +#endif call a%set_host() if (.not.a%is_by_rows()) call a%fix(info) @@ -3445,8 +3510,6 @@ subroutine psb_z_fix_coo(a,info,idir) end subroutine psb_z_fix_coo - - subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) use psb_const_mod use psb_error_mod @@ -4174,7 +4237,6 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) end subroutine psb_z_fix_coo_inner - subroutine psb_z_cp_coo_to_lcoo(a,b,info) use psb_error_mod use psb_z_base_mat_mod, psb_protect_name => psb_z_cp_coo_to_lcoo @@ -4199,10 +4261,21 @@ subroutine psb_z_cp_coo_to_lcoo(a,b,info) call b%set_nzeros(nz) call b%reallocate(nz) +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + b%ia(i) = a%ia(i) + b%ja(i) = a%ja(i) + b%val(i) = a%val(i) + end do + end block +#else b%ia(1:nz) = a%ia(1:nz) b%ja(1:nz) = a%ja(1:nz) b%val(1:nz) = a%val(1:nz) - +#endif call b%set_host() if (.not.b%is_by_rows()) call b%fix(info) @@ -4240,10 +4313,21 @@ subroutine psb_z_cp_coo_from_lcoo(a,b,info) call a%set_nzeros(nz) call a%reallocate(nz) +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + a%ia(i) = b%ia(i) + a%ja(i) = b%ja(i) + a%val(i) = b%val(i) + end do + end block +#else a%ia(1:nz) = b%ia(1:nz) a%ja(1:nz) = b%ja(1:nz) a%val(1:nz) = b%val(1:nz) - +#endif call a%set_host() if (.not.a%is_by_rows()) call a%fix(info) @@ -4442,7 +4526,17 @@ function psb_lz_coo_maxval(a) result(res) nnz = a%get_nzeros() if (allocated(a%val)) then nnz = min(nnz,size(a%val)) +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nnz + res = max(res,abs(a%val(i))) + end do + end block +#else res = maxval(abs(a%val(1:nnz))) +#endif end if end function psb_lz_coo_maxval @@ -4499,7 +4593,17 @@ function psb_lz_coo_csnmi(a) result(res) i = a%ia(j) vt(i) = vt(i) + abs(a%val(j)) end do +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, m + res = max(res,abs(vt(i))) + end do + end block +#else res = maxval(vt(1:m)) +#endif deallocate(vt,stat=info) end if @@ -4539,7 +4643,17 @@ function psb_lz_coo_csnm1(a) result(res) i = a%ja(j) vt(i) = vt(i) + abs(a%val(j)) end do +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, n + res = max(res,abs(vt(i))) + end do + end block +#else res = maxval(vt(1:n)) +#endif deallocate(vt,stat=info) return @@ -4584,7 +4698,6 @@ subroutine psb_lz_coo_rowsum(d,a) d(i) = d(i) + a%val(j) end do - return call psb_erractionrestore(err_act) return @@ -4592,7 +4705,6 @@ subroutine psb_lz_coo_rowsum(d,a) 9999 call psb_error_handler(err_act) return - end subroutine psb_lz_coo_rowsum subroutine psb_lz_coo_arwsum(d,a) @@ -4761,6 +4873,7 @@ subroutine psb_lz_coo_scalplusidentity(d,a,info) end if mnm = min(a%get_nrows(),a%get_ncols()) + !$omp parallel do private(i,j) do i=1,a%get_nzeros() a%val(i) = a%val(i) * d j=a%ia(i) @@ -4813,12 +4926,30 @@ subroutine psb_lz_coo_spaxpby(alpha,a,beta,b,info) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nza + tcoo%ia(i) = a%ia(i) + tcoo%ja(i) = a%ja(i) + tcoo%val(i) = alpha*a%val(i) + end do + !$omp parallel do private(i) + do i=1, nzb + tcoo%ia(nza+i) = bcoo%ia(i) + tcoo%ja(nza+i) = bcoo%ja(i) + tcoo%val(nza+i) = beta*bcoo%val(i) + end do + end block +#else tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ja(1:nza) = a%ja(1:nza) tcoo%val(1:nza) = alpha*a%val(1:nza) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb) +#endif ! Fix the indexes call tcoo%fix(info) ! Move to correct output format @@ -4906,12 +5037,30 @@ function psb_lz_coo_cmpmat(a,b,tol,info) result(res) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nza + tcoo%ia(i) = a%ia(i) + tcoo%ja(i) = a%ja(i) + tcoo%val(i) = a%val(i) + end do + !$omp parallel do private(i) + do i=1, nzb + tcoo%ia(nza+i) = bcoo%ia(i) + tcoo%ja(nza+i) = bcoo%ja(i) + tcoo%val(nza+i) = (-1_psb_dpk_)*bcoo%val(i) + end do + end block +#else tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ja(1:nza) = a%ja(1:nza) tcoo%val(1:nza) = a%val(1:nza) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%val(nza+1:nza+nzb) = (-1_psb_dpk_)*bcoo%val(1:nzb) +#endif ! Fix the indexes call tcoo%fix(info) if (info /= psb_success_) then @@ -5950,7 +6099,7 @@ subroutine psb_lz_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (nz < 0) then info = psb_err_iarg_neg_ - call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) +3 call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) goto 9999 end if if (size(ia) < nz) then diff --git a/base/serial/impl/psb_z_csr_impl.f90 b/base/serial/impl/psb_z_csr_impl.f90 index 498f2b28..d16e39eb 100644 --- a/base/serial/impl/psb_z_csr_impl.f90 +++ b/base/serial/impl/psb_z_csr_impl.f90 @@ -130,10 +130,12 @@ contains if (alpha == zzero) then if (beta == zzero) then + !$omp parallel do private(i) do i = 1, m y(i) = zzero enddo else + !$omp parallel do private(i) do i = 1, m y(i) = beta*y(i) end do @@ -147,6 +149,7 @@ contains if (beta == zzero) then if (alpha == zone) then + !$omp parallel do private(i,j, acc) schedule(static) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 @@ -157,6 +160,7 @@ contains else if (alpha == -zone) then + !$omp parallel do private(i,j, acc) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 @@ -167,6 +171,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 @@ -181,6 +186,7 @@ contains else if (beta == zone) then if (alpha == zone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 @@ -191,6 +197,7 @@ contains else if (alpha == -zone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 @@ -201,6 +208,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 @@ -214,6 +222,7 @@ contains else if (beta == -zone) then if (alpha == zone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 @@ -224,6 +233,7 @@ contains else if (alpha == -zone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 @@ -234,6 +244,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 @@ -247,6 +258,7 @@ contains else if (alpha == zone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 @@ -257,6 +269,7 @@ contains else if (alpha == -zone) then + !$omp parallel do private(i,j,acc) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 @@ -267,6 +280,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc = zzero do j=irp(i), irp(i+1)-1 @@ -282,16 +296,19 @@ contains else if (tra) then if (beta == zzero) then + !$omp parallel do private(i) do i=1, m y(i) = zzero end do else if (beta == zone) then ! Do nothing else if (beta == -zone) then + !$omp parallel do private(i) do i=1, m y(i) = -y(i) end do else + !$omp parallel do private(i) do i=1, m y(i) = beta*y(i) end do @@ -476,16 +493,18 @@ contains complex(psb_dpk_), intent(inout) :: y(ldy,*) logical, intent(in) :: is_triangle,is_unit,tra,ctra - complex(psb_dpk_), intent(inout) :: acc(*) + complex(psb_dpk_), intent(inout) :: acc(:) integer(psb_ipk_) :: i,j, ir if (alpha == zzero) then if (beta == zzero) then + !$omp parallel do private(i) do i = 1, m y(i,1:nc) = zzero enddo else + !$omp parallel do private(i) do i = 1, m y(i,1:nc) = beta*y(i,1:nc) end do @@ -497,6 +516,7 @@ contains if (beta == zzero) then if (alpha == zone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = zzero do j=irp(i), irp(i+1)-1 @@ -507,6 +527,7 @@ contains else if (alpha == -zone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = zzero do j=irp(i), irp(i+1)-1 @@ -517,6 +538,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = zzero do j=irp(i), irp(i+1)-1 @@ -531,6 +553,7 @@ contains else if (beta == zone) then if (alpha == zone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = zzero do j=irp(i), irp(i+1)-1 @@ -541,6 +564,7 @@ contains else if (alpha == -zone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = zzero do j=irp(i), irp(i+1)-1 @@ -551,6 +575,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = zzero do j=irp(i), irp(i+1)-1 @@ -564,6 +589,7 @@ contains else if (beta == -zone) then if (alpha == zone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = zzero do j=irp(i), irp(i+1)-1 @@ -574,6 +600,7 @@ contains else if (alpha == -zone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = zzero do j=irp(i), irp(i+1)-1 @@ -584,6 +611,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = zzero do j=irp(i), irp(i+1)-1 @@ -597,6 +625,7 @@ contains else if (alpha == zone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = zzero do j=irp(i), irp(i+1)-1 @@ -607,6 +636,7 @@ contains else if (alpha == -zone) then + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = zzero do j=irp(i), irp(i+1)-1 @@ -617,6 +647,7 @@ contains else + !$omp parallel do private(i,j,acc) do i=1,m acc(1:nc) = zzero do j=irp(i), irp(i+1)-1 diff --git a/base/serial/impl/psb_z_mat_impl.F90 b/base/serial/impl/psb_z_mat_impl.F90 index 07616c05..422a664d 100644 --- a/base/serial/impl/psb_z_mat_impl.F90 +++ b/base/serial/impl/psb_z_mat_impl.F90 @@ -582,7 +582,7 @@ end subroutine psb_z_get_neigh -subroutine psb_z_csall(nr,nc,a,info,nz) +subroutine psb_z_csall(nr,nc,a,info,nz,type,mold) use psb_z_mat_mod, psb_protect_name => psb_z_csall use psb_z_base_mat_mod use psb_error_mod @@ -591,6 +591,8 @@ subroutine psb_z_csall(nr,nc,a,info,nz) integer(psb_ipk_), intent(in) :: nr,nc integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nz + character(len=*), intent(in), optional :: type + class(psb_z_base_sparse_mat), optional, intent(in) :: mold integer(psb_ipk_) :: err_act character(len=20) :: name='csall' @@ -601,7 +603,23 @@ subroutine psb_z_csall(nr,nc,a,info,nz) call a%free() info = psb_success_ - allocate(psb_z_coo_sparse_mat :: a%a, stat=info) + if (present(mold)) then + allocate(a%a, stat=info, mold=mold) + else if (present(type)) then + select case (type) + case('CSR') + allocate(psb_z_csr_sparse_mat :: a%a, stat=info) + case('COO') + allocate(psb_z_coo_sparse_mat :: a%a, stat=info) + case('CSC') + allocate(psb_z_csc_sparse_mat :: a%a, stat=info) + case default + allocate(psb_z_coo_sparse_mat :: a%a, stat=info) + end select + else + allocate(psb_z_coo_sparse_mat :: a%a, stat=info) + end if + if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info, name) @@ -3381,7 +3399,7 @@ end subroutine psb_lz_get_neigh -subroutine psb_lz_csall(nr,nc,a,info,nz) +subroutine psb_lz_csall(nr,nc,a,info,nz,type,mold) use psb_z_mat_mod, psb_protect_name => psb_lz_csall use psb_z_base_mat_mod use psb_error_mod @@ -3390,6 +3408,8 @@ subroutine psb_lz_csall(nr,nc,a,info,nz) integer(psb_lpk_), intent(in) :: nr,nc integer(psb_ipk_), intent(out) :: info integer(psb_lpk_), intent(in), optional :: nz + character(len=*), intent(in), optional :: type + class(psb_lz_base_sparse_mat), optional, intent(in) :: mold integer(psb_ipk_) :: err_act character(len=20) :: name='csall' @@ -3400,7 +3420,22 @@ subroutine psb_lz_csall(nr,nc,a,info,nz) call a%free() info = psb_success_ - allocate(psb_lz_coo_sparse_mat :: a%a, stat=info) + if (present(mold)) then + allocate(a%a, stat=info, mold=mold) + else if (present(type)) then + select case (type) + case('CSR') + allocate(psb_lz_csr_sparse_mat :: a%a, stat=info) + case('COO') + allocate(psb_lz_coo_sparse_mat :: a%a, stat=info) + case('CSC') + allocate(psb_lz_csc_sparse_mat :: a%a, stat=info) + case default + allocate(psb_lz_coo_sparse_mat :: a%a, stat=info) + end select + else + allocate(psb_lz_coo_sparse_mat :: a%a, stat=info) + end if if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info, name) diff --git a/base/serial/psb_cgelp.f90 b/base/serial/psb_cgelp.f90 deleted file mode 100644 index 6b3b2def..00000000 --- a/base/serial/psb_cgelp.f90 +++ /dev/null @@ -1,246 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari University of Rome Tor Vergata -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -! File: psb_cgelp.f90 -! -! -! Subroutine: psb_cgelp -! Apply a left permutation to a dense matrix -! -! Arguments: -! trans - character. -! iperm - integer. -! x - real, dimension(:,:). -! info - integer. Return code. -subroutine psb_cgelp(trans,iperm,x,info) - use psb_serial_mod, psb_protect_name => psb_cgelp - use psb_const_mod - use psb_error_mod - implicit none - - complex(psb_spk_), intent(inout) :: x(:,:) - integer(psb_ipk_), intent(in) :: iperm(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in) :: trans - - ! local variables - complex(psb_spk_),allocatable :: temp(:) - integer(psb_ipk_) :: int_err(5), i1sz, i2sz, err_act,i,j - integer(psb_ipk_), allocatable :: itemp(:) - complex(psb_spk_),parameter :: one=1 - integer(psb_ipk_) :: debug_level, debug_unit - - character(len=20) :: name, ch_err - name = 'psb_cgelp' - - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - debug_unit = psb_get_debug_unit() - debug_level = psb_get_debug_level() - - i1sz = size(x,dim=1) - i2sz = size(x,dim=2) - - if (debug_level >= psb_debug_serial_)& - & write(debug_unit,*) trim(name),': size',i1sz,i2sz - - allocate(temp(i1sz),itemp(size(iperm)),stat=info) - if (info /= psb_success_) then - info=2040 - call psb_errpush(info,name) - goto 9999 - end if - itemp(:) = iperm(:) - - if (.not.psb_isaperm(i1sz,itemp)) then - info=psb_err_iarg_invalid_value_ - int_err(1) = 1 - call psb_errpush(info,name,i_err=int_err) - goto 9999 - endif - select case( psb_toupper(trans)) - case('N') - do j=1,i2sz - do i=1,i1sz - temp(i) = x(itemp(i),j) - end do - do i=1,i1sz - x(i,j) = temp(i) - end do - end do - case('T') - do j=1,i2sz - do i=1,i1sz - temp(itemp(i)) = x(i,j) - end do - do i=1,i1sz - x(i,j) = temp(i) - end do - end do - case default - info=psb_err_from_subroutine_ - ch_err='dgelp' - call psb_errpush(info,name,a_err=ch_err) - end select - - deallocate(temp,itemp) - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - - return - -end subroutine psb_cgelp - - - -!!$ -!!$ Parallel Sparse BLAS version 3.5 -!!$ (C) Copyright 2006-2018 -!!$ Salvatore Filippone University of Rome Tor Vergata -!!$ Alfredo Buttari University of Rome Tor Vergata -!!$ -!!$ Redistribution and use in source and binary forms, with or without -!!$ modification, are permitted provided that the following conditions -!!$ are met: -!!$ 1. Redistributions of source code must retain the above copyright -!!$ notice, this list of conditions and the following disclaimer. -!!$ 2. Redistributions in binary form must reproduce the above copyright -!!$ notice, this list of conditions, and the following disclaimer in the -!!$ documentation and/or other materials provided with the distribution. -!!$ 3. The name of the PSBLAS group or the names of its contributors may -!!$ not be used to endorse or promote products derived from this -!!$ software without specific written permission. -!!$ -!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -!!$ POSSIBILITY OF SUCH DAMAGE. -!!$ -!!$ -! -! -! Subroutine: psb_cgelpv -! Apply a left permutation to a dense matrix -! -! Arguments: -! trans - character. -! iperm - integer. -! x - real, dimension(:). -! info - integer. Return code. -subroutine psb_cgelpv(trans,iperm,x,info) - use psb_serial_mod, psb_protect_name => psb_cgelpv - use psb_const_mod - use psb_error_mod - implicit none - - complex(psb_spk_), intent(inout) :: x(:) - integer(psb_ipk_), intent(in) :: iperm(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in) :: trans - - ! local variables - integer(psb_ipk_) :: int_err(5), i1sz, err_act, i - complex(psb_spk_),allocatable :: temp(:) - integer(psb_ipk_), allocatable :: itemp(:) - complex(psb_spk_),parameter :: one=1 - integer(psb_ipk_) :: debug_level, debug_unit - - character(len=20) :: name, ch_err - name = 'psb_cgelpv' - - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - debug_unit = psb_get_debug_unit() - debug_level = psb_get_debug_level() - - i1sz = min(size(x),size(iperm)) - - if (debug_level >= psb_debug_serial_)& - & write(debug_unit,*) trim(name),': size',i1sz - allocate(temp(i1sz),itemp(size(iperm)),stat=info) - if (info /= psb_success_) then - info=2040 - call psb_errpush(info,name) - goto 9999 - end if - itemp(:) = iperm(:) - - if (.not.psb_isaperm(i1sz,itemp)) then - info=psb_err_iarg_invalid_value_ - int_err(1) = 1 - call psb_errpush(info,name,i_err=int_err) - goto 9999 - endif - - select case( psb_toupper(trans)) - case('N') - do i=1,i1sz - temp(i) = x(itemp(i)) - end do - do i=1,i1sz - x(i) = temp(i) - end do - case('T') - do i=1,i1sz - temp(itemp(i)) = x(i) - end do - do i=1,i1sz - x(i) = temp(i) - end do - case default - info=psb_err_from_subroutine_ - ch_err='dgelp' - call psb_errpush(info,name,a_err=ch_err) - end select - - deallocate(temp,itemp) - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - - return - -end subroutine psb_cgelpv - diff --git a/base/serial/psb_dgelp.f90 b/base/serial/psb_dgelp.f90 deleted file mode 100644 index 7dc4b132..00000000 --- a/base/serial/psb_dgelp.f90 +++ /dev/null @@ -1,246 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari University of Rome Tor Vergata -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -! File: psb_dgelp.f90 -! -! -! Subroutine: psb_dgelp -! Apply a left permutation to a dense matrix -! -! Arguments: -! trans - character. -! iperm - integer. -! x - real, dimension(:,:). -! info - integer. Return code. -subroutine psb_dgelp(trans,iperm,x,info) - use psb_serial_mod, psb_protect_name => psb_dgelp - use psb_const_mod - use psb_error_mod - implicit none - - real(psb_dpk_), intent(inout) :: x(:,:) - integer(psb_ipk_), intent(in) :: iperm(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in) :: trans - - ! local variables - real(psb_dpk_),allocatable :: temp(:) - integer(psb_ipk_) :: int_err(5), i1sz, i2sz, err_act,i,j - integer(psb_ipk_), allocatable :: itemp(:) - real(psb_dpk_),parameter :: one=1 - integer(psb_ipk_) :: debug_level, debug_unit - - character(len=20) :: name, ch_err - name = 'psb_dgelp' - - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - debug_unit = psb_get_debug_unit() - debug_level = psb_get_debug_level() - - i1sz = size(x,dim=1) - i2sz = size(x,dim=2) - - if (debug_level >= psb_debug_serial_)& - & write(debug_unit,*) trim(name),': size',i1sz,i2sz - - allocate(temp(i1sz),itemp(size(iperm)),stat=info) - if (info /= psb_success_) then - info=2040 - call psb_errpush(info,name) - goto 9999 - end if - itemp(:) = iperm(:) - - if (.not.psb_isaperm(i1sz,itemp)) then - info=psb_err_iarg_invalid_value_ - int_err(1) = 1 - call psb_errpush(info,name,i_err=int_err) - goto 9999 - endif - select case( psb_toupper(trans)) - case('N') - do j=1,i2sz - do i=1,i1sz - temp(i) = x(itemp(i),j) - end do - do i=1,i1sz - x(i,j) = temp(i) - end do - end do - case('T') - do j=1,i2sz - do i=1,i1sz - temp(itemp(i)) = x(i,j) - end do - do i=1,i1sz - x(i,j) = temp(i) - end do - end do - case default - info=psb_err_from_subroutine_ - ch_err='dgelp' - call psb_errpush(info,name,a_err=ch_err) - end select - - deallocate(temp,itemp) - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - - return - -end subroutine psb_dgelp - - - -!!$ -!!$ Parallel Sparse BLAS version 3.5 -!!$ (C) Copyright 2006-2018 -!!$ Salvatore Filippone University of Rome Tor Vergata -!!$ Alfredo Buttari University of Rome Tor Vergata -!!$ -!!$ Redistribution and use in source and binary forms, with or without -!!$ modification, are permitted provided that the following conditions -!!$ are met: -!!$ 1. Redistributions of source code must retain the above copyright -!!$ notice, this list of conditions and the following disclaimer. -!!$ 2. Redistributions in binary form must reproduce the above copyright -!!$ notice, this list of conditions, and the following disclaimer in the -!!$ documentation and/or other materials provided with the distribution. -!!$ 3. The name of the PSBLAS group or the names of its contributors may -!!$ not be used to endorse or promote products derived from this -!!$ software without specific written permission. -!!$ -!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -!!$ POSSIBILITY OF SUCH DAMAGE. -!!$ -!!$ -! -! -! Subroutine: psb_dgelpv -! Apply a left permutation to a dense matrix -! -! Arguments: -! trans - character. -! iperm - integer. -! x - real, dimension(:). -! info - integer. Return code. -subroutine psb_dgelpv(trans,iperm,x,info) - use psb_serial_mod, psb_protect_name => psb_dgelpv - use psb_const_mod - use psb_error_mod - implicit none - - real(psb_dpk_), intent(inout) :: x(:) - integer(psb_ipk_), intent(in) :: iperm(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in) :: trans - - ! local variables - integer(psb_ipk_) :: int_err(5), i1sz, err_act, i - real(psb_dpk_),allocatable :: temp(:) - integer(psb_ipk_), allocatable :: itemp(:) - real(psb_dpk_),parameter :: one=1 - integer(psb_ipk_) :: debug_level, debug_unit - - character(len=20) :: name, ch_err - name = 'psb_dgelpv' - - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - debug_unit = psb_get_debug_unit() - debug_level = psb_get_debug_level() - - i1sz = min(size(x),size(iperm)) - - if (debug_level >= psb_debug_serial_)& - & write(debug_unit,*) trim(name),': size',i1sz - allocate(temp(i1sz),itemp(size(iperm)),stat=info) - if (info /= psb_success_) then - info=2040 - call psb_errpush(info,name) - goto 9999 - end if - itemp(:) = iperm(:) - - if (.not.psb_isaperm(i1sz,itemp)) then - info=psb_err_iarg_invalid_value_ - int_err(1) = 1 - call psb_errpush(info,name,i_err=int_err) - goto 9999 - endif - - select case( psb_toupper(trans)) - case('N') - do i=1,i1sz - temp(i) = x(itemp(i)) - end do - do i=1,i1sz - x(i) = temp(i) - end do - case('T') - do i=1,i1sz - temp(itemp(i)) = x(i) - end do - do i=1,i1sz - x(i) = temp(i) - end do - case default - info=psb_err_from_subroutine_ - ch_err='dgelp' - call psb_errpush(info,name,a_err=ch_err) - end select - - deallocate(temp,itemp) - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - - return - -end subroutine psb_dgelpv - diff --git a/base/serial/psb_sgelp.f90 b/base/serial/psb_sgelp.f90 deleted file mode 100644 index 73e46c71..00000000 --- a/base/serial/psb_sgelp.f90 +++ /dev/null @@ -1,247 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari University of Rome Tor Vergata -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -! File: psb_sgelp.f90 -! -! -! Subroutine: psb_sgelp -! Apply a left permutation to a dense matrix -! -! Arguments: -! trans - character. -! iperm - integer. -! x - real, dimension(:,:). -! info - integer. Return code. -subroutine psb_sgelp(trans,iperm,x,info) - use psb_serial_mod, psb_protect_name => psb_sgelp - use psb_const_mod - use psb_error_mod - implicit none - - real(psb_spk_), intent(inout) :: x(:,:) - integer(psb_ipk_), intent(in) :: iperm(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in) :: trans - ! local variables - integer(psb_ipk_) :: ctxt - real(psb_spk_),allocatable :: temp(:) - integer(psb_ipk_) :: int_err(5), i1sz, i2sz, err_act,i,j - integer(psb_ipk_), allocatable :: itemp(:) - real(psb_spk_),parameter :: one=1 - integer(psb_ipk_) :: debug_level, debug_unit - - character(len=20) :: name, ch_err - name = 'psb_sgelp' - - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - debug_unit = psb_get_debug_unit() - debug_level = psb_get_debug_level() - - i1sz = size(x,dim=1) - i2sz = size(x,dim=2) - - if (debug_level >= psb_debug_serial_)& - & write(debug_unit,*) trim(name),': size',i1sz,i2sz - - allocate(temp(i1sz),itemp(size(iperm)),stat=info) - if (info /= psb_success_) then - info=2040 - call psb_errpush(info,name) - goto 9999 - end if - itemp(:) = iperm(:) - - if (.not.psb_isaperm(i1sz,itemp)) then - info=psb_err_iarg_invalid_value_ - int_err(1) = 1 - call psb_errpush(info,name,i_err=int_err) - goto 9999 - endif - select case( psb_toupper(trans)) - case('N') - do j=1,i2sz - do i=1,i1sz - temp(i) = x(itemp(i),j) - end do - do i=1,i1sz - x(i,j) = temp(i) - end do - end do - case('T') - do j=1,i2sz - do i=1,i1sz - temp(itemp(i)) = x(i,j) - end do - do i=1,i1sz - x(i,j) = temp(i) - end do - end do - case default - info=psb_err_from_subroutine_ - ch_err='dgelp' - call psb_errpush(info,name,a_err=ch_err) - end select - - deallocate(temp,itemp) - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - - return - -end subroutine psb_sgelp - - - -!!$ -!!$ Parallel Sparse BLAS version 3.5 -!!$ (C) Copyright 2006-2018 -!!$ Salvatore Filippone University of Rome Tor Vergata -!!$ Alfredo Buttari University of Rome Tor Vergata -!!$ -!!$ Redistribution and use in source and binary forms, with or without -!!$ modification, are permitted provided that the following conditions -!!$ are met: -!!$ 1. Redistributions of source code must retain the above copyright -!!$ notice, this list of conditions and the following disclaimer. -!!$ 2. Redistributions in binary form must reproduce the above copyright -!!$ notice, this list of conditions, and the following disclaimer in the -!!$ documentation and/or other materials provided with the distribution. -!!$ 3. The name of the PSBLAS group or the names of its contributors may -!!$ not be used to endorse or promote products derived from this -!!$ software without specific written permission. -!!$ -!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -!!$ POSSIBILITY OF SUCH DAMAGE. -!!$ -!!$ -! -! -! Subroutine: psb_sgelpv -! Apply a left permutation to a dense matrix -! -! Arguments: -! trans - character. -! iperm - integer. -! x - real, dimension(:). -! info - integer. Return code. -subroutine psb_sgelpv(trans,iperm,x,info) - use psb_serial_mod, psb_protect_name => psb_sgelpv - use psb_const_mod - use psb_error_mod - implicit none - - real(psb_spk_), intent(inout) :: x(:) - integer(psb_ipk_), intent(in) :: iperm(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in) :: trans - - ! local variables - integer(psb_ipk_) :: ctxt - integer(psb_ipk_) :: int_err(5), i1sz, err_act, i - real(psb_spk_),allocatable :: temp(:) - integer(psb_ipk_), allocatable :: itemp(:) - real(psb_spk_),parameter :: one=1 - integer(psb_ipk_) :: debug_level, debug_unit - - character(len=20) :: name, ch_err - name = 'psb_sgelpv' - - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - debug_unit = psb_get_debug_unit() - debug_level = psb_get_debug_level() - - i1sz = min(size(x),size(iperm)) - - if (debug_level >= psb_debug_serial_)& - & write(debug_unit,*) trim(name),': size',i1sz - allocate(temp(i1sz),itemp(size(iperm)),stat=info) - if (info /= psb_success_) then - info=2040 - call psb_errpush(info,name) - goto 9999 - end if - itemp(:) = iperm(:) - - if (.not.psb_isaperm(i1sz,itemp)) then - info=psb_err_iarg_invalid_value_ - int_err(1) = 1 - call psb_errpush(info,name,i_err=int_err) - goto 9999 - endif - - select case( psb_toupper(trans)) - case('N') - do i=1,i1sz - temp(i) = x(itemp(i)) - end do - do i=1,i1sz - x(i) = temp(i) - end do - case('T') - do i=1,i1sz - temp(itemp(i)) = x(i) - end do - do i=1,i1sz - x(i) = temp(i) - end do - case default - info=psb_err_from_subroutine_ - ch_err='dgelp' - call psb_errpush(info,name,a_err=ch_err) - end select - - deallocate(temp,itemp) - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - - return - -end subroutine psb_sgelpv - diff --git a/base/serial/psb_zgelp.f90 b/base/serial/psb_zgelp.f90 deleted file mode 100644 index 609e9657..00000000 --- a/base/serial/psb_zgelp.f90 +++ /dev/null @@ -1,246 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari University of Rome Tor Vergata -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -! File: psb_zgelp.f90 -! -! -! Subroutine: psb_zgelp -! Apply a left permutation to a dense matrix -! -! Arguments: -! trans - character. -! iperm - integer. -! x - real, dimension(:,:). -! info - integer. Return code. -subroutine psb_zgelp(trans,iperm,x,info) - use psb_serial_mod, psb_protect_name => psb_zgelp - use psb_const_mod - use psb_error_mod - implicit none - - complex(psb_dpk_), intent(inout) :: x(:,:) - integer(psb_ipk_), intent(in) :: iperm(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in) :: trans - - ! local variables - complex(psb_dpk_),allocatable :: temp(:) - integer(psb_ipk_) :: int_err(5), i1sz, i2sz, err_act,i,j - integer(psb_ipk_), allocatable :: itemp(:) - complex(psb_dpk_),parameter :: one=1 - integer(psb_ipk_) :: debug_level, debug_unit - - character(len=20) :: name, ch_err - name = 'psb_zgelp' - - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - debug_unit = psb_get_debug_unit() - debug_level = psb_get_debug_level() - - i1sz = size(x,dim=1) - i2sz = size(x,dim=2) - - if (debug_level >= psb_debug_serial_)& - & write(debug_unit,*) trim(name),': size',i1sz,i2sz - - allocate(temp(i1sz),itemp(size(iperm)),stat=info) - if (info /= psb_success_) then - info=2040 - call psb_errpush(info,name) - goto 9999 - end if - itemp(:) = iperm(:) - - if (.not.psb_isaperm(i1sz,itemp)) then - info=psb_err_iarg_invalid_value_ - int_err(1) = 1 - call psb_errpush(info,name,i_err=int_err) - goto 9999 - endif - select case( psb_toupper(trans)) - case('N') - do j=1,i2sz - do i=1,i1sz - temp(i) = x(itemp(i),j) - end do - do i=1,i1sz - x(i,j) = temp(i) - end do - end do - case('T') - do j=1,i2sz - do i=1,i1sz - temp(itemp(i)) = x(i,j) - end do - do i=1,i1sz - x(i,j) = temp(i) - end do - end do - case default - info=psb_err_from_subroutine_ - ch_err='dgelp' - call psb_errpush(info,name,a_err=ch_err) - end select - - deallocate(temp,itemp) - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - - return - -end subroutine psb_zgelp - - - -!!$ -!!$ Parallel Sparse BLAS version 3.5 -!!$ (C) Copyright 2006-2018 -!!$ Salvatore Filippone University of Rome Tor Vergata -!!$ Alfredo Buttari University of Rome Tor Vergata -!!$ -!!$ Redistribution and use in source and binary forms, with or without -!!$ modification, are permitted provided that the following conditions -!!$ are met: -!!$ 1. Redistributions of source code must retain the above copyright -!!$ notice, this list of conditions and the following disclaimer. -!!$ 2. Redistributions in binary form must reproduce the above copyright -!!$ notice, this list of conditions, and the following disclaimer in the -!!$ documentation and/or other materials provided with the distribution. -!!$ 3. The name of the PSBLAS group or the names of its contributors may -!!$ not be used to endorse or promote products derived from this -!!$ software without specific written permission. -!!$ -!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -!!$ POSSIBILITY OF SUCH DAMAGE. -!!$ -!!$ -! -! -! Subroutine: psb_zgelpv -! Apply a left permutation to a dense matrix -! -! Arguments: -! trans - character. -! iperm - integer. -! x - real, dimension(:). -! info - integer. Return code. -subroutine psb_zgelpv(trans,iperm,x,info) - use psb_serial_mod, psb_protect_name => psb_zgelpv - use psb_const_mod - use psb_error_mod - implicit none - - complex(psb_dpk_), intent(inout) :: x(:) - integer(psb_ipk_), intent(in) :: iperm(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in) :: trans - - ! local variables - integer(psb_ipk_) :: int_err(5), i1sz, err_act, i - complex(psb_dpk_),allocatable :: temp(:) - integer(psb_ipk_), allocatable :: itemp(:) - complex(psb_dpk_),parameter :: one=1 - integer(psb_ipk_) :: debug_level, debug_unit - - character(len=20) :: name, ch_err - name = 'psb_zgelpv' - - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - debug_unit = psb_get_debug_unit() - debug_level = psb_get_debug_level() - - i1sz = min(size(x),size(iperm)) - - if (debug_level >= psb_debug_serial_)& - & write(debug_unit,*) trim(name),': size',i1sz - allocate(temp(i1sz),itemp(size(iperm)),stat=info) - if (info /= psb_success_) then - info=2040 - call psb_errpush(info,name) - goto 9999 - end if - itemp(:) = iperm(:) - - if (.not.psb_isaperm(i1sz,itemp)) then - info=psb_err_iarg_invalid_value_ - int_err(1) = 1 - call psb_errpush(info,name,i_err=int_err) - goto 9999 - endif - - select case( psb_toupper(trans)) - case('N') - do i=1,i1sz - temp(i) = x(itemp(i)) - end do - do i=1,i1sz - x(i) = temp(i) - end do - case('T') - do i=1,i1sz - temp(itemp(i)) = x(i) - end do - do i=1,i1sz - x(i) = temp(i) - end do - case default - info=psb_err_from_subroutine_ - ch_err='dgelp' - call psb_errpush(info,name,a_err=ch_err) - end select - - deallocate(temp,itemp) - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - - return - -end subroutine psb_zgelpv - diff --git a/base/serial/psi_c_serial_impl.f90 b/base/serial/psi_c_serial_impl.F90 similarity index 50% rename from base/serial/psi_c_serial_impl.f90 rename to base/serial/psi_c_serial_impl.F90 index 2120683d..1da2ce6e 100644 --- a/base/serial/psi_c_serial_impl.f90 +++ b/base/serial/psi_c_serial_impl.F90 @@ -29,6 +29,407 @@ ! POSSIBILITY OF SUCH DAMAGE. ! ! +subroutine psb_m_cgelp(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_m_cgelp + use psb_const_mod + use psb_error_mod + implicit none + + complex(psb_spk_), intent(inout) :: x(:,:) + integer(psb_mpk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + complex(psb_spk_),allocatable :: temp(:) + integer(psb_ipk_) :: int_err(5), i1sz, i2sz, err_act,i,j + integer(psb_ipk_), allocatable :: itemp(:) + complex(psb_spk_),parameter :: one=1 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + name = 'psb_cgelp' + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = size(x,dim=1) + i2sz = size(x,dim=2) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz,i2sz + + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + select case( psb_toupper(trans)) + case('N') + do j=1,i2sz + do i=1,i1sz + temp(i) = x(itemp(i),j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case('T') + do j=1,i2sz + do i=1,i1sz + temp(itemp(i)) = x(i,j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cgelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_m_cgelp + + + +!!$ +!!$ Parallel Sparse BLAS version 3.5 +!!$ (C) Copyright 2006-2018 +!!$ Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the PSBLAS group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +! +! +! Subroutine: psb_cgelpv +! Apply a left permutation to a dense matrix +! +! Arguments: +! trans - character. +! iperm - integer. +! x - real, dimension(:). +! info - integer. Return code. +subroutine psb_m_cgelpv(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_m_cgelpv + use psb_const_mod + use psb_error_mod + implicit none + + complex(psb_spk_), intent(inout) :: x(:) + integer(psb_mpk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + integer(psb_ipk_) :: int_err(5), i1sz, err_act, i + complex(psb_spk_),allocatable :: temp(:) + integer(psb_ipk_), allocatable :: itemp(:) + complex(psb_spk_),parameter :: one=1 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + name = 'psb_cgelpv' + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = min(size(x),size(iperm)) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + select case( psb_toupper(trans)) + case('N') + do i=1,i1sz + temp(i) = x(itemp(i)) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case('T') + do i=1,i1sz + temp(itemp(i)) = x(i) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cgelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_m_cgelpv + +subroutine psb_e_cgelp(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_e_cgelp + use psb_const_mod + use psb_error_mod + implicit none + + complex(psb_spk_), intent(inout) :: x(:,:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + complex(psb_spk_),allocatable :: temp(:) + integer(psb_ipk_) :: int_err(5), err_act + integer(psb_epk_) :: i1sz, i2sz, i, j + integer(psb_epk_), allocatable :: itemp(:) + complex(psb_spk_),parameter :: one=1 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + name = 'psb_cgelp' + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = size(x,dim=1) + i2sz = size(x,dim=2) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz,i2sz + + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + select case( psb_toupper(trans)) + case('N') + do j=1,i2sz + do i=1,i1sz + temp(i) = x(itemp(i),j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case('T') + do j=1,i2sz + do i=1,i1sz + temp(itemp(i)) = x(i,j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cgelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_e_cgelp + + + +!!$ +!!$ Parallel Sparse BLAS version 3.5 +!!$ (C) Copyright 2006-2018 +!!$ Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the PSBLAS group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +! +! +! Subroutine: psb_cgelpv +! Apply a left permutation to a dense matrix +! +! Arguments: +! trans - character. +! iperm - integer. +! x - real, dimension(:). +! info - integer. Return code. +subroutine psb_e_cgelpv(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_e_cgelpv + use psb_const_mod + use psb_error_mod + implicit none + + complex(psb_spk_), intent(inout) :: x(:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + integer(psb_ipk_) :: int_err(5), err_act + complex(psb_spk_),allocatable :: temp(:) + integer(psb_epk_) :: i1sz, i + integer(psb_epk_), allocatable :: itemp(:) + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + name = 'psb_cgelp' + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = min(size(x),size(iperm)) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + select case( psb_toupper(trans)) + case('N') + do i=1,i1sz + temp(i) = x(itemp(i)) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case('T') + do i=1,i1sz + temp(itemp(i)) = x(i) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cgelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_e_cgelpv + subroutine psi_caxpby(m,n,alpha, x, beta, y, info) use psb_const_mod @@ -40,9 +441,9 @@ subroutine psi_caxpby(m,n,alpha, x, beta, y, info) complex(psb_spk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act - integer(psb_ipk_) :: lx, ly + integer(psb_ipk_) :: lx, ly, i integer(psb_ipk_) :: ierr(5) - character(len=20) :: name, ch_err + character(len=20) :: name, ch_err name='psb_geaxpby' info=psb_success_ @@ -101,7 +502,8 @@ subroutine psi_caxpbyv(m,alpha, x, beta, y, info) integer(psb_ipk_) :: err_act integer(psb_ipk_) :: lx, ly integer(psb_ipk_) :: ierr(5) - character(len=20) :: name, ch_err + integer(psb_ipk_) :: i + character(len=20) :: name, ch_err name='psb_geaxpby' info=psb_success_ @@ -131,7 +533,106 @@ subroutine psi_caxpbyv(m,alpha, x, beta, y, info) goto 9999 end if - if (m>0) call caxpby(m,ione,alpha,x,lx,beta,y,ly,info) +! if (m>0) call caxpby(m,ione,alpha,x,lx,beta,y,ly,info) + + if (alpha.eq.czero) then + if (beta.eq.czero) then + !$omp parallel do private(i) + do i=1,m + y(i) = czero + enddo + else if (beta.eq.cone) then + ! + ! Do nothing! + ! + + else if (beta.eq.-cone) then + !$omp parallel do private(i) + do i=1,m + y(i) = - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + enddo + endif + + else if (alpha.eq.cone) then + + if (beta.eq.czero) then + !$omp parallel do private(i) + do i=1,m + y(i) = x(i) + enddo + else if (beta.eq.cone) then + !$omp parallel do private(i) + do i=1,m + y(i) = x(i) + y(i) + enddo + + else if (beta.eq.-cone) then + !$omp parallel do private(i) + do i=1,m + y(i) = x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + y(i) = x(i) + beta*y(i) + enddo + endif + + else if (alpha.eq.-cone) then + + if (beta.eq.czero) then + !$omp parallel do private(i) + do i=1,m + y(i) = -x(i) + enddo + else if (beta.eq.cone) then + !$omp parallel do private(i) + do i=1,m + y(i) = -x(i) + y(i) + enddo + else if (beta.eq.-cone) then + !$omp parallel do private(i) + do i=1,m + y(i) = -x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + y(i) = -x(i) + beta*y(i) + enddo + endif + + else + + if (beta.eq.czero) then + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + enddo + else if (beta.eq.cone) then + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + y(i) + enddo + else if (beta.eq.-cone) then + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + beta*y(i) + enddo + endif + + endif + call psb_erractionrestore(err_act) return @@ -154,7 +655,7 @@ subroutine psi_caxpbyv2(m,alpha, x, beta, y, z, info) complex(psb_spk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act - integer(psb_ipk_) :: lx, ly, lz + integer(psb_ipk_) :: lx, ly, lz, i integer(psb_ipk_) :: ierr(5) character(len=20) :: name, ch_err @@ -193,7 +694,105 @@ subroutine psi_caxpbyv2(m,alpha, x, beta, y, z, info) goto 9999 end if - if (m>0) call caxpbyv2(m,ione,alpha,x,lx,beta,y,ly,z,lz,info) + if (alpha.eq.czero) then + if (beta.eq.czero) then + !$omp parallel do private(i) + do i=1,m + Z(i) = czero + enddo + else if (beta.eq.cone) then + ! + ! Do nothing! + ! + + else if (beta.eq.-cone) then + !$omp parallel do private(i) + do i=1,m + Z(i) = - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + Z(i) = beta*y(i) + enddo + endif + + else if (alpha.eq.cone) then + + if (beta.eq.czero) then + !$omp parallel do private(i) + do i=1,m + Z(i) = x(i) + enddo + else if (beta.eq.cone) then + !$omp parallel do private(i) + do i=1,m + Z(i) = x(i) + y(i) + enddo + + else if (beta.eq.-cone) then + !$omp parallel do private(i) + do i=1,m + Z(i) = x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + Z(i) = x(i) + beta*y(i) + enddo + endif + + else if (alpha.eq.-cone) then + + if (beta.eq.czero) then + !$omp parallel do private(i) + do i=1,m + Z(i) = -x(i) + enddo + else if (beta.eq.cone) then + !$omp parallel do private(i) + do i=1,m + Z(i) = -x(i) + y(i) + enddo + + else if (beta.eq.-cone) then + !$omp parallel do private(i) + do i=1,m + Z(i) = -x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + Z(i) = -x(i) + beta*y(i) + enddo + endif + + else + + if (beta.eq.czero) then + !$omp parallel do private(i) + do i=1,m + Z(i) = alpha*x(i) + enddo + else if (beta.eq.cone) then + !$omp parallel do private(i) + do i=1,m + Z(i) = alpha*x(i) + y(i) + enddo + + else if (beta.eq.-cone) then + !$omp parallel do private(i) + do i=1,m + Z(i) = alpha*x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + Z(i) = alpha*x(i) + beta*y(i) + enddo + endif + + endif call psb_erractionrestore(err_act) return @@ -541,6 +1140,7 @@ subroutine caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (alpha.eq.czero) then if (beta.eq.czero) then do j=1, n + !$omp parallel do private(i) do i=1,m y(i,j) = czero enddo @@ -552,12 +1152,14 @@ subroutine caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-cone) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = beta*y(i,j) enddo @@ -568,12 +1170,14 @@ subroutine caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.czero) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = x(i,j) enddo enddo else if (beta.eq.cone) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = x(i,j) + y(i,j) enddo @@ -581,12 +1185,14 @@ subroutine caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-cone) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = x(i,j) + beta*y(i,j) enddo @@ -597,12 +1203,14 @@ subroutine caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.czero) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = -x(i,j) enddo enddo else if (beta.eq.cone) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = -x(i,j) + y(i,j) enddo @@ -610,12 +1218,14 @@ subroutine caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-cone) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = -x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = -x(i,j) + beta*y(i,j) enddo @@ -626,12 +1236,14 @@ subroutine caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.czero) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = alpha*x(i,j) enddo enddo else if (beta.eq.cone) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = alpha*x(i,j) + y(i,j) enddo @@ -639,12 +1251,14 @@ subroutine caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-cone) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = alpha*x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = alpha*x(i,j) + beta*y(i,j) enddo @@ -730,12 +1344,14 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) else if (beta.eq.-cone) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = beta*y(i,j) enddo @@ -746,12 +1362,14 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) if (beta.eq.czero) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = x(i,j) enddo enddo else if (beta.eq.cone) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = x(i,j) + y(i,j) enddo @@ -759,12 +1377,14 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) else if (beta.eq.-cone) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = x(i,j) + beta*y(i,j) enddo @@ -775,12 +1395,14 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) if (beta.eq.czero) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = -x(i,j) enddo enddo else if (beta.eq.cone) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = -x(i,j) + y(i,j) enddo @@ -788,12 +1410,14 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) else if (beta.eq.-cone) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = -x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = -x(i,j) + beta*y(i,j) enddo @@ -804,12 +1428,14 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) if (beta.eq.czero) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = alpha*x(i,j) enddo enddo else if (beta.eq.cone) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = alpha*x(i,j) + y(i,j) enddo @@ -817,12 +1443,14 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) else if (beta.eq.-cone) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = alpha*x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = alpha*x(i,j) + beta*y(i,j) enddo diff --git a/base/serial/psi_d_serial_impl.f90 b/base/serial/psi_d_serial_impl.F90 similarity index 50% rename from base/serial/psi_d_serial_impl.f90 rename to base/serial/psi_d_serial_impl.F90 index 0d80f459..8c65b349 100644 --- a/base/serial/psi_d_serial_impl.f90 +++ b/base/serial/psi_d_serial_impl.F90 @@ -29,6 +29,407 @@ ! POSSIBILITY OF SUCH DAMAGE. ! ! +subroutine psb_m_dgelp(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_m_dgelp + use psb_const_mod + use psb_error_mod + implicit none + + real(psb_dpk_), intent(inout) :: x(:,:) + integer(psb_mpk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + real(psb_dpk_),allocatable :: temp(:) + integer(psb_ipk_) :: int_err(5), i1sz, i2sz, err_act,i,j + integer(psb_ipk_), allocatable :: itemp(:) + real(psb_dpk_),parameter :: one=1 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + name = 'psb_dgelp' + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = size(x,dim=1) + i2sz = size(x,dim=2) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz,i2sz + + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + select case( psb_toupper(trans)) + case('N') + do j=1,i2sz + do i=1,i1sz + temp(i) = x(itemp(i),j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case('T') + do j=1,i2sz + do i=1,i1sz + temp(itemp(i)) = x(i,j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='dgelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_m_dgelp + + + +!!$ +!!$ Parallel Sparse BLAS version 3.5 +!!$ (C) Copyright 2006-2018 +!!$ Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the PSBLAS group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +! +! +! Subroutine: psb_dgelpv +! Apply a left permutation to a dense matrix +! +! Arguments: +! trans - character. +! iperm - integer. +! x - real, dimension(:). +! info - integer. Return code. +subroutine psb_m_dgelpv(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_m_dgelpv + use psb_const_mod + use psb_error_mod + implicit none + + real(psb_dpk_), intent(inout) :: x(:) + integer(psb_mpk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + integer(psb_ipk_) :: int_err(5), i1sz, err_act, i + real(psb_dpk_),allocatable :: temp(:) + integer(psb_ipk_), allocatable :: itemp(:) + real(psb_dpk_),parameter :: one=1 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + name = 'psb_dgelpv' + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = min(size(x),size(iperm)) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + select case( psb_toupper(trans)) + case('N') + do i=1,i1sz + temp(i) = x(itemp(i)) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case('T') + do i=1,i1sz + temp(itemp(i)) = x(i) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='dgelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_m_dgelpv + +subroutine psb_e_dgelp(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_e_dgelp + use psb_const_mod + use psb_error_mod + implicit none + + real(psb_dpk_), intent(inout) :: x(:,:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + real(psb_dpk_),allocatable :: temp(:) + integer(psb_ipk_) :: int_err(5), err_act + integer(psb_epk_) :: i1sz, i2sz, i, j + integer(psb_epk_), allocatable :: itemp(:) + real(psb_dpk_),parameter :: one=1 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + name = 'psb_dgelp' + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = size(x,dim=1) + i2sz = size(x,dim=2) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz,i2sz + + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + select case( psb_toupper(trans)) + case('N') + do j=1,i2sz + do i=1,i1sz + temp(i) = x(itemp(i),j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case('T') + do j=1,i2sz + do i=1,i1sz + temp(itemp(i)) = x(i,j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='dgelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_e_dgelp + + + +!!$ +!!$ Parallel Sparse BLAS version 3.5 +!!$ (C) Copyright 2006-2018 +!!$ Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the PSBLAS group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +! +! +! Subroutine: psb_dgelpv +! Apply a left permutation to a dense matrix +! +! Arguments: +! trans - character. +! iperm - integer. +! x - real, dimension(:). +! info - integer. Return code. +subroutine psb_e_dgelpv(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_e_dgelpv + use psb_const_mod + use psb_error_mod + implicit none + + real(psb_dpk_), intent(inout) :: x(:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + integer(psb_ipk_) :: int_err(5), err_act + real(psb_dpk_),allocatable :: temp(:) + integer(psb_epk_) :: i1sz, i + integer(psb_epk_), allocatable :: itemp(:) + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + name = 'psb_dgelp' + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = min(size(x),size(iperm)) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + select case( psb_toupper(trans)) + case('N') + do i=1,i1sz + temp(i) = x(itemp(i)) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case('T') + do i=1,i1sz + temp(itemp(i)) = x(i) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='dgelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_e_dgelpv + subroutine psi_daxpby(m,n,alpha, x, beta, y, info) use psb_const_mod @@ -40,9 +441,9 @@ subroutine psi_daxpby(m,n,alpha, x, beta, y, info) real(psb_dpk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act - integer(psb_ipk_) :: lx, ly + integer(psb_ipk_) :: lx, ly, i integer(psb_ipk_) :: ierr(5) - character(len=20) :: name, ch_err + character(len=20) :: name, ch_err name='psb_geaxpby' info=psb_success_ @@ -101,7 +502,8 @@ subroutine psi_daxpbyv(m,alpha, x, beta, y, info) integer(psb_ipk_) :: err_act integer(psb_ipk_) :: lx, ly integer(psb_ipk_) :: ierr(5) - character(len=20) :: name, ch_err + integer(psb_ipk_) :: i + character(len=20) :: name, ch_err name='psb_geaxpby' info=psb_success_ @@ -131,7 +533,106 @@ subroutine psi_daxpbyv(m,alpha, x, beta, y, info) goto 9999 end if - if (m>0) call daxpby(m,ione,alpha,x,lx,beta,y,ly,info) +! if (m>0) call daxpby(m,ione,alpha,x,lx,beta,y,ly,info) + + if (alpha.eq.dzero) then + if (beta.eq.dzero) then + !$omp parallel do private(i) + do i=1,m + y(i) = dzero + enddo + else if (beta.eq.done) then + ! + ! Do nothing! + ! + + else if (beta.eq.-done) then + !$omp parallel do private(i) + do i=1,m + y(i) = - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + enddo + endif + + else if (alpha.eq.done) then + + if (beta.eq.dzero) then + !$omp parallel do private(i) + do i=1,m + y(i) = x(i) + enddo + else if (beta.eq.done) then + !$omp parallel do private(i) + do i=1,m + y(i) = x(i) + y(i) + enddo + + else if (beta.eq.-done) then + !$omp parallel do private(i) + do i=1,m + y(i) = x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + y(i) = x(i) + beta*y(i) + enddo + endif + + else if (alpha.eq.-done) then + + if (beta.eq.dzero) then + !$omp parallel do private(i) + do i=1,m + y(i) = -x(i) + enddo + else if (beta.eq.done) then + !$omp parallel do private(i) + do i=1,m + y(i) = -x(i) + y(i) + enddo + else if (beta.eq.-done) then + !$omp parallel do private(i) + do i=1,m + y(i) = -x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + y(i) = -x(i) + beta*y(i) + enddo + endif + + else + + if (beta.eq.dzero) then + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + enddo + else if (beta.eq.done) then + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + y(i) + enddo + else if (beta.eq.-done) then + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + beta*y(i) + enddo + endif + + endif + call psb_erractionrestore(err_act) return @@ -154,7 +655,7 @@ subroutine psi_daxpbyv2(m,alpha, x, beta, y, z, info) real(psb_dpk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act - integer(psb_ipk_) :: lx, ly, lz + integer(psb_ipk_) :: lx, ly, lz, i integer(psb_ipk_) :: ierr(5) character(len=20) :: name, ch_err @@ -193,7 +694,105 @@ subroutine psi_daxpbyv2(m,alpha, x, beta, y, z, info) goto 9999 end if - if (m>0) call daxpbyv2(m,ione,alpha,x,lx,beta,y,ly,z,lz,info) + if (alpha.eq.dzero) then + if (beta.eq.dzero) then + !$omp parallel do private(i) + do i=1,m + Z(i) = dzero + enddo + else if (beta.eq.done) then + ! + ! Do nothing! + ! + + else if (beta.eq.-done) then + !$omp parallel do private(i) + do i=1,m + Z(i) = - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + Z(i) = beta*y(i) + enddo + endif + + else if (alpha.eq.done) then + + if (beta.eq.dzero) then + !$omp parallel do private(i) + do i=1,m + Z(i) = x(i) + enddo + else if (beta.eq.done) then + !$omp parallel do private(i) + do i=1,m + Z(i) = x(i) + y(i) + enddo + + else if (beta.eq.-done) then + !$omp parallel do private(i) + do i=1,m + Z(i) = x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + Z(i) = x(i) + beta*y(i) + enddo + endif + + else if (alpha.eq.-done) then + + if (beta.eq.dzero) then + !$omp parallel do private(i) + do i=1,m + Z(i) = -x(i) + enddo + else if (beta.eq.done) then + !$omp parallel do private(i) + do i=1,m + Z(i) = -x(i) + y(i) + enddo + + else if (beta.eq.-done) then + !$omp parallel do private(i) + do i=1,m + Z(i) = -x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + Z(i) = -x(i) + beta*y(i) + enddo + endif + + else + + if (beta.eq.dzero) then + !$omp parallel do private(i) + do i=1,m + Z(i) = alpha*x(i) + enddo + else if (beta.eq.done) then + !$omp parallel do private(i) + do i=1,m + Z(i) = alpha*x(i) + y(i) + enddo + + else if (beta.eq.-done) then + !$omp parallel do private(i) + do i=1,m + Z(i) = alpha*x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + Z(i) = alpha*x(i) + beta*y(i) + enddo + endif + + endif call psb_erractionrestore(err_act) return @@ -541,6 +1140,7 @@ subroutine daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (alpha.eq.dzero) then if (beta.eq.dzero) then do j=1, n + !$omp parallel do private(i) do i=1,m y(i,j) = dzero enddo @@ -552,12 +1152,14 @@ subroutine daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-done) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = beta*y(i,j) enddo @@ -568,12 +1170,14 @@ subroutine daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.dzero) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = x(i,j) enddo enddo else if (beta.eq.done) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = x(i,j) + y(i,j) enddo @@ -581,12 +1185,14 @@ subroutine daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-done) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = x(i,j) + beta*y(i,j) enddo @@ -597,12 +1203,14 @@ subroutine daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.dzero) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = -x(i,j) enddo enddo else if (beta.eq.done) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = -x(i,j) + y(i,j) enddo @@ -610,12 +1218,14 @@ subroutine daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-done) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = -x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = -x(i,j) + beta*y(i,j) enddo @@ -626,12 +1236,14 @@ subroutine daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.dzero) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = alpha*x(i,j) enddo enddo else if (beta.eq.done) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = alpha*x(i,j) + y(i,j) enddo @@ -639,12 +1251,14 @@ subroutine daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-done) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = alpha*x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = alpha*x(i,j) + beta*y(i,j) enddo @@ -730,12 +1344,14 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) else if (beta.eq.-done) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = beta*y(i,j) enddo @@ -746,12 +1362,14 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) if (beta.eq.dzero) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = x(i,j) enddo enddo else if (beta.eq.done) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = x(i,j) + y(i,j) enddo @@ -759,12 +1377,14 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) else if (beta.eq.-done) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = x(i,j) + beta*y(i,j) enddo @@ -775,12 +1395,14 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) if (beta.eq.dzero) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = -x(i,j) enddo enddo else if (beta.eq.done) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = -x(i,j) + y(i,j) enddo @@ -788,12 +1410,14 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) else if (beta.eq.-done) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = -x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = -x(i,j) + beta*y(i,j) enddo @@ -804,12 +1428,14 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) if (beta.eq.dzero) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = alpha*x(i,j) enddo enddo else if (beta.eq.done) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = alpha*x(i,j) + y(i,j) enddo @@ -817,12 +1443,14 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) else if (beta.eq.-done) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = alpha*x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = alpha*x(i,j) + beta*y(i,j) enddo diff --git a/base/serial/psi_e_serial_impl.f90 b/base/serial/psi_e_serial_impl.F90 similarity index 60% rename from base/serial/psi_e_serial_impl.f90 rename to base/serial/psi_e_serial_impl.F90 index 0595d87e..988bad52 100644 --- a/base/serial/psi_e_serial_impl.f90 +++ b/base/serial/psi_e_serial_impl.F90 @@ -29,6 +29,407 @@ ! POSSIBILITY OF SUCH DAMAGE. ! ! +subroutine psb_m_egelp(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_m_egelp + use psb_const_mod + use psb_error_mod + implicit none + + integer(psb_epk_), intent(inout) :: x(:,:) + integer(psb_mpk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + integer(psb_epk_),allocatable :: temp(:) + integer(psb_ipk_) :: int_err(5), i1sz, i2sz, err_act,i,j + integer(psb_ipk_), allocatable :: itemp(:) + integer(psb_epk_),parameter :: one=1 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + name = 'psb_egelp' + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = size(x,dim=1) + i2sz = size(x,dim=2) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz,i2sz + + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + select case( psb_toupper(trans)) + case('N') + do j=1,i2sz + do i=1,i1sz + temp(i) = x(itemp(i),j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case('T') + do j=1,i2sz + do i=1,i1sz + temp(itemp(i)) = x(i,j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='egelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_m_egelp + + + +!!$ +!!$ Parallel Sparse BLAS version 3.5 +!!$ (C) Copyright 2006-2018 +!!$ Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the PSBLAS group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +! +! +! Subroutine: psb_egelpv +! Apply a left permutation to a dense matrix +! +! Arguments: +! trans - character. +! iperm - integer. +! x - real, dimension(:). +! info - integer. Return code. +subroutine psb_m_egelpv(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_m_egelpv + use psb_const_mod + use psb_error_mod + implicit none + + integer(psb_epk_), intent(inout) :: x(:) + integer(psb_mpk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + integer(psb_ipk_) :: int_err(5), i1sz, err_act, i + integer(psb_epk_),allocatable :: temp(:) + integer(psb_ipk_), allocatable :: itemp(:) + integer(psb_epk_),parameter :: one=1 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + name = 'psb_egelpv' + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = min(size(x),size(iperm)) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + select case( psb_toupper(trans)) + case('N') + do i=1,i1sz + temp(i) = x(itemp(i)) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case('T') + do i=1,i1sz + temp(itemp(i)) = x(i) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='egelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_m_egelpv + +subroutine psb_e_egelp(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_e_egelp + use psb_const_mod + use psb_error_mod + implicit none + + integer(psb_epk_), intent(inout) :: x(:,:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + integer(psb_epk_),allocatable :: temp(:) + integer(psb_ipk_) :: int_err(5), err_act + integer(psb_epk_) :: i1sz, i2sz, i, j + integer(psb_epk_), allocatable :: itemp(:) + integer(psb_epk_),parameter :: one=1 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + name = 'psb_egelp' + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = size(x,dim=1) + i2sz = size(x,dim=2) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz,i2sz + + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + select case( psb_toupper(trans)) + case('N') + do j=1,i2sz + do i=1,i1sz + temp(i) = x(itemp(i),j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case('T') + do j=1,i2sz + do i=1,i1sz + temp(itemp(i)) = x(i,j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='egelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_e_egelp + + + +!!$ +!!$ Parallel Sparse BLAS version 3.5 +!!$ (C) Copyright 2006-2018 +!!$ Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the PSBLAS group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +! +! +! Subroutine: psb_egelpv +! Apply a left permutation to a dense matrix +! +! Arguments: +! trans - character. +! iperm - integer. +! x - real, dimension(:). +! info - integer. Return code. +subroutine psb_e_egelpv(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_e_egelpv + use psb_const_mod + use psb_error_mod + implicit none + + integer(psb_epk_), intent(inout) :: x(:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + integer(psb_ipk_) :: int_err(5), err_act + integer(psb_epk_),allocatable :: temp(:) + integer(psb_epk_) :: i1sz, i + integer(psb_epk_), allocatable :: itemp(:) + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + name = 'psb_egelp' + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = min(size(x),size(iperm)) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + select case( psb_toupper(trans)) + case('N') + do i=1,i1sz + temp(i) = x(itemp(i)) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case('T') + do i=1,i1sz + temp(itemp(i)) = x(i) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='egelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_e_egelpv + subroutine psi_eaxpby(m,n,alpha, x, beta, y, info) use psb_const_mod diff --git a/base/serial/psi_i2_serial_impl.f90 b/base/serial/psi_i2_serial_impl.F90 similarity index 60% rename from base/serial/psi_i2_serial_impl.f90 rename to base/serial/psi_i2_serial_impl.F90 index 59d579f2..83b078f0 100644 --- a/base/serial/psi_i2_serial_impl.f90 +++ b/base/serial/psi_i2_serial_impl.F90 @@ -29,6 +29,407 @@ ! POSSIBILITY OF SUCH DAMAGE. ! ! +subroutine psb_m_i2gelp(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_m_i2gelp + use psb_const_mod + use psb_error_mod + implicit none + + integer(psb_i2pk_), intent(inout) :: x(:,:) + integer(psb_mpk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + integer(psb_i2pk_),allocatable :: temp(:) + integer(psb_ipk_) :: int_err(5), i1sz, i2sz, err_act,i,j + integer(psb_ipk_), allocatable :: itemp(:) + integer(psb_i2pk_),parameter :: one=1 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + name = 'psb_i2gelp' + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = size(x,dim=1) + i2sz = size(x,dim=2) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz,i2sz + + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + select case( psb_toupper(trans)) + case('N') + do j=1,i2sz + do i=1,i1sz + temp(i) = x(itemp(i),j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case('T') + do j=1,i2sz + do i=1,i1sz + temp(itemp(i)) = x(i,j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='i2gelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_m_i2gelp + + + +!!$ +!!$ Parallel Sparse BLAS version 3.5 +!!$ (C) Copyright 2006-2018 +!!$ Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the PSBLAS group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +! +! +! Subroutine: psb_i2gelpv +! Apply a left permutation to a dense matrix +! +! Arguments: +! trans - character. +! iperm - integer. +! x - real, dimension(:). +! info - integer. Return code. +subroutine psb_m_i2gelpv(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_m_i2gelpv + use psb_const_mod + use psb_error_mod + implicit none + + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_mpk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + integer(psb_ipk_) :: int_err(5), i1sz, err_act, i + integer(psb_i2pk_),allocatable :: temp(:) + integer(psb_ipk_), allocatable :: itemp(:) + integer(psb_i2pk_),parameter :: one=1 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + name = 'psb_i2gelpv' + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = min(size(x),size(iperm)) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + select case( psb_toupper(trans)) + case('N') + do i=1,i1sz + temp(i) = x(itemp(i)) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case('T') + do i=1,i1sz + temp(itemp(i)) = x(i) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='i2gelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_m_i2gelpv + +subroutine psb_e_i2gelp(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_e_i2gelp + use psb_const_mod + use psb_error_mod + implicit none + + integer(psb_i2pk_), intent(inout) :: x(:,:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + integer(psb_i2pk_),allocatable :: temp(:) + integer(psb_ipk_) :: int_err(5), err_act + integer(psb_epk_) :: i1sz, i2sz, i, j + integer(psb_epk_), allocatable :: itemp(:) + integer(psb_i2pk_),parameter :: one=1 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + name = 'psb_i2gelp' + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = size(x,dim=1) + i2sz = size(x,dim=2) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz,i2sz + + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + select case( psb_toupper(trans)) + case('N') + do j=1,i2sz + do i=1,i1sz + temp(i) = x(itemp(i),j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case('T') + do j=1,i2sz + do i=1,i1sz + temp(itemp(i)) = x(i,j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='i2gelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_e_i2gelp + + + +!!$ +!!$ Parallel Sparse BLAS version 3.5 +!!$ (C) Copyright 2006-2018 +!!$ Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the PSBLAS group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +! +! +! Subroutine: psb_i2gelpv +! Apply a left permutation to a dense matrix +! +! Arguments: +! trans - character. +! iperm - integer. +! x - real, dimension(:). +! info - integer. Return code. +subroutine psb_e_i2gelpv(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_e_i2gelpv + use psb_const_mod + use psb_error_mod + implicit none + + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + integer(psb_ipk_) :: int_err(5), err_act + integer(psb_i2pk_),allocatable :: temp(:) + integer(psb_epk_) :: i1sz, i + integer(psb_epk_), allocatable :: itemp(:) + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + name = 'psb_i2gelp' + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = min(size(x),size(iperm)) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + select case( psb_toupper(trans)) + case('N') + do i=1,i1sz + temp(i) = x(itemp(i)) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case('T') + do i=1,i1sz + temp(itemp(i)) = x(i) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='i2gelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_e_i2gelpv + subroutine psi_i2axpby(m,n,alpha, x, beta, y, info) use psb_const_mod diff --git a/base/serial/psi_m_serial_impl.f90 b/base/serial/psi_m_serial_impl.F90 similarity index 60% rename from base/serial/psi_m_serial_impl.f90 rename to base/serial/psi_m_serial_impl.F90 index cc8b9f4f..950e2358 100644 --- a/base/serial/psi_m_serial_impl.f90 +++ b/base/serial/psi_m_serial_impl.F90 @@ -29,6 +29,407 @@ ! POSSIBILITY OF SUCH DAMAGE. ! ! +subroutine psb_m_mgelp(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_m_mgelp + use psb_const_mod + use psb_error_mod + implicit none + + integer(psb_mpk_), intent(inout) :: x(:,:) + integer(psb_mpk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + integer(psb_mpk_),allocatable :: temp(:) + integer(psb_ipk_) :: int_err(5), i1sz, i2sz, err_act,i,j + integer(psb_ipk_), allocatable :: itemp(:) + integer(psb_mpk_),parameter :: one=1 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + name = 'psb_mgelp' + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = size(x,dim=1) + i2sz = size(x,dim=2) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz,i2sz + + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + select case( psb_toupper(trans)) + case('N') + do j=1,i2sz + do i=1,i1sz + temp(i) = x(itemp(i),j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case('T') + do j=1,i2sz + do i=1,i1sz + temp(itemp(i)) = x(i,j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='mgelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_m_mgelp + + + +!!$ +!!$ Parallel Sparse BLAS version 3.5 +!!$ (C) Copyright 2006-2018 +!!$ Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the PSBLAS group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +! +! +! Subroutine: psb_mgelpv +! Apply a left permutation to a dense matrix +! +! Arguments: +! trans - character. +! iperm - integer. +! x - real, dimension(:). +! info - integer. Return code. +subroutine psb_m_mgelpv(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_m_mgelpv + use psb_const_mod + use psb_error_mod + implicit none + + integer(psb_mpk_), intent(inout) :: x(:) + integer(psb_mpk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + integer(psb_ipk_) :: int_err(5), i1sz, err_act, i + integer(psb_mpk_),allocatable :: temp(:) + integer(psb_ipk_), allocatable :: itemp(:) + integer(psb_mpk_),parameter :: one=1 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + name = 'psb_mgelpv' + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = min(size(x),size(iperm)) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + select case( psb_toupper(trans)) + case('N') + do i=1,i1sz + temp(i) = x(itemp(i)) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case('T') + do i=1,i1sz + temp(itemp(i)) = x(i) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='mgelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_m_mgelpv + +subroutine psb_e_mgelp(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_e_mgelp + use psb_const_mod + use psb_error_mod + implicit none + + integer(psb_mpk_), intent(inout) :: x(:,:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + integer(psb_mpk_),allocatable :: temp(:) + integer(psb_ipk_) :: int_err(5), err_act + integer(psb_epk_) :: i1sz, i2sz, i, j + integer(psb_epk_), allocatable :: itemp(:) + integer(psb_mpk_),parameter :: one=1 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + name = 'psb_mgelp' + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = size(x,dim=1) + i2sz = size(x,dim=2) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz,i2sz + + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + select case( psb_toupper(trans)) + case('N') + do j=1,i2sz + do i=1,i1sz + temp(i) = x(itemp(i),j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case('T') + do j=1,i2sz + do i=1,i1sz + temp(itemp(i)) = x(i,j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='mgelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_e_mgelp + + + +!!$ +!!$ Parallel Sparse BLAS version 3.5 +!!$ (C) Copyright 2006-2018 +!!$ Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the PSBLAS group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +! +! +! Subroutine: psb_mgelpv +! Apply a left permutation to a dense matrix +! +! Arguments: +! trans - character. +! iperm - integer. +! x - real, dimension(:). +! info - integer. Return code. +subroutine psb_e_mgelpv(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_e_mgelpv + use psb_const_mod + use psb_error_mod + implicit none + + integer(psb_mpk_), intent(inout) :: x(:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + integer(psb_ipk_) :: int_err(5), err_act + integer(psb_mpk_),allocatable :: temp(:) + integer(psb_epk_) :: i1sz, i + integer(psb_epk_), allocatable :: itemp(:) + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + name = 'psb_mgelp' + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = min(size(x),size(iperm)) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + select case( psb_toupper(trans)) + case('N') + do i=1,i1sz + temp(i) = x(itemp(i)) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case('T') + do i=1,i1sz + temp(itemp(i)) = x(i) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='mgelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_e_mgelpv + subroutine psi_maxpby(m,n,alpha, x, beta, y, info) use psb_const_mod diff --git a/base/serial/psi_s_serial_impl.f90 b/base/serial/psi_s_serial_impl.F90 similarity index 50% rename from base/serial/psi_s_serial_impl.f90 rename to base/serial/psi_s_serial_impl.F90 index dfe2559b..6c8e21e2 100644 --- a/base/serial/psi_s_serial_impl.f90 +++ b/base/serial/psi_s_serial_impl.F90 @@ -29,6 +29,407 @@ ! POSSIBILITY OF SUCH DAMAGE. ! ! +subroutine psb_m_sgelp(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_m_sgelp + use psb_const_mod + use psb_error_mod + implicit none + + real(psb_spk_), intent(inout) :: x(:,:) + integer(psb_mpk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + real(psb_spk_),allocatable :: temp(:) + integer(psb_ipk_) :: int_err(5), i1sz, i2sz, err_act,i,j + integer(psb_ipk_), allocatable :: itemp(:) + real(psb_spk_),parameter :: one=1 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + name = 'psb_sgelp' + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = size(x,dim=1) + i2sz = size(x,dim=2) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz,i2sz + + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + select case( psb_toupper(trans)) + case('N') + do j=1,i2sz + do i=1,i1sz + temp(i) = x(itemp(i),j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case('T') + do j=1,i2sz + do i=1,i1sz + temp(itemp(i)) = x(i,j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='sgelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_m_sgelp + + + +!!$ +!!$ Parallel Sparse BLAS version 3.5 +!!$ (C) Copyright 2006-2018 +!!$ Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the PSBLAS group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +! +! +! Subroutine: psb_sgelpv +! Apply a left permutation to a dense matrix +! +! Arguments: +! trans - character. +! iperm - integer. +! x - real, dimension(:). +! info - integer. Return code. +subroutine psb_m_sgelpv(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_m_sgelpv + use psb_const_mod + use psb_error_mod + implicit none + + real(psb_spk_), intent(inout) :: x(:) + integer(psb_mpk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + integer(psb_ipk_) :: int_err(5), i1sz, err_act, i + real(psb_spk_),allocatable :: temp(:) + integer(psb_ipk_), allocatable :: itemp(:) + real(psb_spk_),parameter :: one=1 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + name = 'psb_sgelpv' + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = min(size(x),size(iperm)) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + select case( psb_toupper(trans)) + case('N') + do i=1,i1sz + temp(i) = x(itemp(i)) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case('T') + do i=1,i1sz + temp(itemp(i)) = x(i) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='sgelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_m_sgelpv + +subroutine psb_e_sgelp(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_e_sgelp + use psb_const_mod + use psb_error_mod + implicit none + + real(psb_spk_), intent(inout) :: x(:,:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + real(psb_spk_),allocatable :: temp(:) + integer(psb_ipk_) :: int_err(5), err_act + integer(psb_epk_) :: i1sz, i2sz, i, j + integer(psb_epk_), allocatable :: itemp(:) + real(psb_spk_),parameter :: one=1 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + name = 'psb_sgelp' + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = size(x,dim=1) + i2sz = size(x,dim=2) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz,i2sz + + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + select case( psb_toupper(trans)) + case('N') + do j=1,i2sz + do i=1,i1sz + temp(i) = x(itemp(i),j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case('T') + do j=1,i2sz + do i=1,i1sz + temp(itemp(i)) = x(i,j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='sgelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_e_sgelp + + + +!!$ +!!$ Parallel Sparse BLAS version 3.5 +!!$ (C) Copyright 2006-2018 +!!$ Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the PSBLAS group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +! +! +! Subroutine: psb_sgelpv +! Apply a left permutation to a dense matrix +! +! Arguments: +! trans - character. +! iperm - integer. +! x - real, dimension(:). +! info - integer. Return code. +subroutine psb_e_sgelpv(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_e_sgelpv + use psb_const_mod + use psb_error_mod + implicit none + + real(psb_spk_), intent(inout) :: x(:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + integer(psb_ipk_) :: int_err(5), err_act + real(psb_spk_),allocatable :: temp(:) + integer(psb_epk_) :: i1sz, i + integer(psb_epk_), allocatable :: itemp(:) + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + name = 'psb_sgelp' + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = min(size(x),size(iperm)) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + select case( psb_toupper(trans)) + case('N') + do i=1,i1sz + temp(i) = x(itemp(i)) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case('T') + do i=1,i1sz + temp(itemp(i)) = x(i) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='sgelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_e_sgelpv + subroutine psi_saxpby(m,n,alpha, x, beta, y, info) use psb_const_mod @@ -40,9 +441,9 @@ subroutine psi_saxpby(m,n,alpha, x, beta, y, info) real(psb_spk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act - integer(psb_ipk_) :: lx, ly + integer(psb_ipk_) :: lx, ly, i integer(psb_ipk_) :: ierr(5) - character(len=20) :: name, ch_err + character(len=20) :: name, ch_err name='psb_geaxpby' info=psb_success_ @@ -101,7 +502,8 @@ subroutine psi_saxpbyv(m,alpha, x, beta, y, info) integer(psb_ipk_) :: err_act integer(psb_ipk_) :: lx, ly integer(psb_ipk_) :: ierr(5) - character(len=20) :: name, ch_err + integer(psb_ipk_) :: i + character(len=20) :: name, ch_err name='psb_geaxpby' info=psb_success_ @@ -131,7 +533,106 @@ subroutine psi_saxpbyv(m,alpha, x, beta, y, info) goto 9999 end if - if (m>0) call saxpby(m,ione,alpha,x,lx,beta,y,ly,info) +! if (m>0) call saxpby(m,ione,alpha,x,lx,beta,y,ly,info) + + if (alpha.eq.szero) then + if (beta.eq.szero) then + !$omp parallel do private(i) + do i=1,m + y(i) = szero + enddo + else if (beta.eq.sone) then + ! + ! Do nothing! + ! + + else if (beta.eq.-sone) then + !$omp parallel do private(i) + do i=1,m + y(i) = - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + enddo + endif + + else if (alpha.eq.sone) then + + if (beta.eq.szero) then + !$omp parallel do private(i) + do i=1,m + y(i) = x(i) + enddo + else if (beta.eq.sone) then + !$omp parallel do private(i) + do i=1,m + y(i) = x(i) + y(i) + enddo + + else if (beta.eq.-sone) then + !$omp parallel do private(i) + do i=1,m + y(i) = x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + y(i) = x(i) + beta*y(i) + enddo + endif + + else if (alpha.eq.-sone) then + + if (beta.eq.szero) then + !$omp parallel do private(i) + do i=1,m + y(i) = -x(i) + enddo + else if (beta.eq.sone) then + !$omp parallel do private(i) + do i=1,m + y(i) = -x(i) + y(i) + enddo + else if (beta.eq.-sone) then + !$omp parallel do private(i) + do i=1,m + y(i) = -x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + y(i) = -x(i) + beta*y(i) + enddo + endif + + else + + if (beta.eq.szero) then + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + enddo + else if (beta.eq.sone) then + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + y(i) + enddo + else if (beta.eq.-sone) then + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + beta*y(i) + enddo + endif + + endif + call psb_erractionrestore(err_act) return @@ -154,7 +655,7 @@ subroutine psi_saxpbyv2(m,alpha, x, beta, y, z, info) real(psb_spk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act - integer(psb_ipk_) :: lx, ly, lz + integer(psb_ipk_) :: lx, ly, lz, i integer(psb_ipk_) :: ierr(5) character(len=20) :: name, ch_err @@ -193,7 +694,105 @@ subroutine psi_saxpbyv2(m,alpha, x, beta, y, z, info) goto 9999 end if - if (m>0) call saxpbyv2(m,ione,alpha,x,lx,beta,y,ly,z,lz,info) + if (alpha.eq.szero) then + if (beta.eq.szero) then + !$omp parallel do private(i) + do i=1,m + Z(i) = szero + enddo + else if (beta.eq.sone) then + ! + ! Do nothing! + ! + + else if (beta.eq.-sone) then + !$omp parallel do private(i) + do i=1,m + Z(i) = - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + Z(i) = beta*y(i) + enddo + endif + + else if (alpha.eq.sone) then + + if (beta.eq.szero) then + !$omp parallel do private(i) + do i=1,m + Z(i) = x(i) + enddo + else if (beta.eq.sone) then + !$omp parallel do private(i) + do i=1,m + Z(i) = x(i) + y(i) + enddo + + else if (beta.eq.-sone) then + !$omp parallel do private(i) + do i=1,m + Z(i) = x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + Z(i) = x(i) + beta*y(i) + enddo + endif + + else if (alpha.eq.-sone) then + + if (beta.eq.szero) then + !$omp parallel do private(i) + do i=1,m + Z(i) = -x(i) + enddo + else if (beta.eq.sone) then + !$omp parallel do private(i) + do i=1,m + Z(i) = -x(i) + y(i) + enddo + + else if (beta.eq.-sone) then + !$omp parallel do private(i) + do i=1,m + Z(i) = -x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + Z(i) = -x(i) + beta*y(i) + enddo + endif + + else + + if (beta.eq.szero) then + !$omp parallel do private(i) + do i=1,m + Z(i) = alpha*x(i) + enddo + else if (beta.eq.sone) then + !$omp parallel do private(i) + do i=1,m + Z(i) = alpha*x(i) + y(i) + enddo + + else if (beta.eq.-sone) then + !$omp parallel do private(i) + do i=1,m + Z(i) = alpha*x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + Z(i) = alpha*x(i) + beta*y(i) + enddo + endif + + endif call psb_erractionrestore(err_act) return @@ -541,6 +1140,7 @@ subroutine saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (alpha.eq.szero) then if (beta.eq.szero) then do j=1, n + !$omp parallel do private(i) do i=1,m y(i,j) = szero enddo @@ -552,12 +1152,14 @@ subroutine saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-sone) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = beta*y(i,j) enddo @@ -568,12 +1170,14 @@ subroutine saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.szero) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = x(i,j) enddo enddo else if (beta.eq.sone) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = x(i,j) + y(i,j) enddo @@ -581,12 +1185,14 @@ subroutine saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-sone) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = x(i,j) + beta*y(i,j) enddo @@ -597,12 +1203,14 @@ subroutine saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.szero) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = -x(i,j) enddo enddo else if (beta.eq.sone) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = -x(i,j) + y(i,j) enddo @@ -610,12 +1218,14 @@ subroutine saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-sone) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = -x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = -x(i,j) + beta*y(i,j) enddo @@ -626,12 +1236,14 @@ subroutine saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.szero) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = alpha*x(i,j) enddo enddo else if (beta.eq.sone) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = alpha*x(i,j) + y(i,j) enddo @@ -639,12 +1251,14 @@ subroutine saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-sone) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = alpha*x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = alpha*x(i,j) + beta*y(i,j) enddo @@ -730,12 +1344,14 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) else if (beta.eq.-sone) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = beta*y(i,j) enddo @@ -746,12 +1362,14 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) if (beta.eq.szero) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = x(i,j) enddo enddo else if (beta.eq.sone) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = x(i,j) + y(i,j) enddo @@ -759,12 +1377,14 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) else if (beta.eq.-sone) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = x(i,j) + beta*y(i,j) enddo @@ -775,12 +1395,14 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) if (beta.eq.szero) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = -x(i,j) enddo enddo else if (beta.eq.sone) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = -x(i,j) + y(i,j) enddo @@ -788,12 +1410,14 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) else if (beta.eq.-sone) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = -x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = -x(i,j) + beta*y(i,j) enddo @@ -804,12 +1428,14 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) if (beta.eq.szero) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = alpha*x(i,j) enddo enddo else if (beta.eq.sone) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = alpha*x(i,j) + y(i,j) enddo @@ -817,12 +1443,14 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) else if (beta.eq.-sone) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = alpha*x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = alpha*x(i,j) + beta*y(i,j) enddo diff --git a/base/serial/psi_z_serial_impl.f90 b/base/serial/psi_z_serial_impl.F90 similarity index 50% rename from base/serial/psi_z_serial_impl.f90 rename to base/serial/psi_z_serial_impl.F90 index 5b7036e6..f3087992 100644 --- a/base/serial/psi_z_serial_impl.f90 +++ b/base/serial/psi_z_serial_impl.F90 @@ -29,6 +29,407 @@ ! POSSIBILITY OF SUCH DAMAGE. ! ! +subroutine psb_m_zgelp(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_m_zgelp + use psb_const_mod + use psb_error_mod + implicit none + + complex(psb_dpk_), intent(inout) :: x(:,:) + integer(psb_mpk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + complex(psb_dpk_),allocatable :: temp(:) + integer(psb_ipk_) :: int_err(5), i1sz, i2sz, err_act,i,j + integer(psb_ipk_), allocatable :: itemp(:) + complex(psb_dpk_),parameter :: one=1 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + name = 'psb_zgelp' + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = size(x,dim=1) + i2sz = size(x,dim=2) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz,i2sz + + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + select case( psb_toupper(trans)) + case('N') + do j=1,i2sz + do i=1,i1sz + temp(i) = x(itemp(i),j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case('T') + do j=1,i2sz + do i=1,i1sz + temp(itemp(i)) = x(i,j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='zgelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_m_zgelp + + + +!!$ +!!$ Parallel Sparse BLAS version 3.5 +!!$ (C) Copyright 2006-2018 +!!$ Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the PSBLAS group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +! +! +! Subroutine: psb_zgelpv +! Apply a left permutation to a dense matrix +! +! Arguments: +! trans - character. +! iperm - integer. +! x - real, dimension(:). +! info - integer. Return code. +subroutine psb_m_zgelpv(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_m_zgelpv + use psb_const_mod + use psb_error_mod + implicit none + + complex(psb_dpk_), intent(inout) :: x(:) + integer(psb_mpk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + integer(psb_ipk_) :: int_err(5), i1sz, err_act, i + complex(psb_dpk_),allocatable :: temp(:) + integer(psb_ipk_), allocatable :: itemp(:) + complex(psb_dpk_),parameter :: one=1 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + name = 'psb_zgelpv' + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = min(size(x),size(iperm)) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + select case( psb_toupper(trans)) + case('N') + do i=1,i1sz + temp(i) = x(itemp(i)) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case('T') + do i=1,i1sz + temp(itemp(i)) = x(i) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='zgelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_m_zgelpv + +subroutine psb_e_zgelp(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_e_zgelp + use psb_const_mod + use psb_error_mod + implicit none + + complex(psb_dpk_), intent(inout) :: x(:,:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + complex(psb_dpk_),allocatable :: temp(:) + integer(psb_ipk_) :: int_err(5), err_act + integer(psb_epk_) :: i1sz, i2sz, i, j + integer(psb_epk_), allocatable :: itemp(:) + complex(psb_dpk_),parameter :: one=1 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + name = 'psb_zgelp' + + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = size(x,dim=1) + i2sz = size(x,dim=2) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz,i2sz + + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + select case( psb_toupper(trans)) + case('N') + do j=1,i2sz + do i=1,i1sz + temp(i) = x(itemp(i),j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case('T') + do j=1,i2sz + do i=1,i1sz + temp(itemp(i)) = x(i,j) + end do + do i=1,i1sz + x(i,j) = temp(i) + end do + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='zgelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_e_zgelp + + + +!!$ +!!$ Parallel Sparse BLAS version 3.5 +!!$ (C) Copyright 2006-2018 +!!$ Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the PSBLAS group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +! +! +! Subroutine: psb_zgelpv +! Apply a left permutation to a dense matrix +! +! Arguments: +! trans - character. +! iperm - integer. +! x - real, dimension(:). +! info - integer. Return code. +subroutine psb_e_zgelpv(trans,iperm,x,info) + use psb_serial_mod, psb_protect_name => psb_e_zgelpv + use psb_const_mod + use psb_error_mod + implicit none + + complex(psb_dpk_), intent(inout) :: x(:) + integer(psb_epk_), intent(in) :: iperm(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in) :: trans + + ! local variables + integer(psb_ipk_) :: int_err(5), err_act + complex(psb_dpk_),allocatable :: temp(:) + integer(psb_epk_) :: i1sz, i + integer(psb_epk_), allocatable :: itemp(:) + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + name = 'psb_zgelp' + if(psb_get_errstatus() /= 0) return + info=psb_success_ + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + i1sz = min(size(x),size(iperm)) + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': size',i1sz + allocate(temp(i1sz),itemp(size(iperm)),stat=info) + if (info /= psb_success_) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + itemp(:) = iperm(:) + + if (.not.psb_isaperm(i1sz,itemp)) then + info=psb_err_iarg_invalid_value_ + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + select case( psb_toupper(trans)) + case('N') + do i=1,i1sz + temp(i) = x(itemp(i)) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case('T') + do i=1,i1sz + temp(itemp(i)) = x(i) + end do + do i=1,i1sz + x(i) = temp(i) + end do + case default + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='zgelp') + end select + + deallocate(temp,itemp) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_e_zgelpv + subroutine psi_zaxpby(m,n,alpha, x, beta, y, info) use psb_const_mod @@ -40,9 +441,9 @@ subroutine psi_zaxpby(m,n,alpha, x, beta, y, info) complex(psb_dpk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act - integer(psb_ipk_) :: lx, ly + integer(psb_ipk_) :: lx, ly, i integer(psb_ipk_) :: ierr(5) - character(len=20) :: name, ch_err + character(len=20) :: name, ch_err name='psb_geaxpby' info=psb_success_ @@ -101,7 +502,8 @@ subroutine psi_zaxpbyv(m,alpha, x, beta, y, info) integer(psb_ipk_) :: err_act integer(psb_ipk_) :: lx, ly integer(psb_ipk_) :: ierr(5) - character(len=20) :: name, ch_err + integer(psb_ipk_) :: i + character(len=20) :: name, ch_err name='psb_geaxpby' info=psb_success_ @@ -131,7 +533,106 @@ subroutine psi_zaxpbyv(m,alpha, x, beta, y, info) goto 9999 end if - if (m>0) call zaxpby(m,ione,alpha,x,lx,beta,y,ly,info) +! if (m>0) call zaxpby(m,ione,alpha,x,lx,beta,y,ly,info) + + if (alpha.eq.zzero) then + if (beta.eq.zzero) then + !$omp parallel do private(i) + do i=1,m + y(i) = zzero + enddo + else if (beta.eq.zone) then + ! + ! Do nothing! + ! + + else if (beta.eq.-zone) then + !$omp parallel do private(i) + do i=1,m + y(i) = - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + enddo + endif + + else if (alpha.eq.zone) then + + if (beta.eq.zzero) then + !$omp parallel do private(i) + do i=1,m + y(i) = x(i) + enddo + else if (beta.eq.zone) then + !$omp parallel do private(i) + do i=1,m + y(i) = x(i) + y(i) + enddo + + else if (beta.eq.-zone) then + !$omp parallel do private(i) + do i=1,m + y(i) = x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + y(i) = x(i) + beta*y(i) + enddo + endif + + else if (alpha.eq.-zone) then + + if (beta.eq.zzero) then + !$omp parallel do private(i) + do i=1,m + y(i) = -x(i) + enddo + else if (beta.eq.zone) then + !$omp parallel do private(i) + do i=1,m + y(i) = -x(i) + y(i) + enddo + else if (beta.eq.-zone) then + !$omp parallel do private(i) + do i=1,m + y(i) = -x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + y(i) = -x(i) + beta*y(i) + enddo + endif + + else + + if (beta.eq.zzero) then + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + enddo + else if (beta.eq.zone) then + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + y(i) + enddo + else if (beta.eq.-zone) then + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + beta*y(i) + enddo + endif + + endif + call psb_erractionrestore(err_act) return @@ -154,7 +655,7 @@ subroutine psi_zaxpbyv2(m,alpha, x, beta, y, z, info) complex(psb_dpk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act - integer(psb_ipk_) :: lx, ly, lz + integer(psb_ipk_) :: lx, ly, lz, i integer(psb_ipk_) :: ierr(5) character(len=20) :: name, ch_err @@ -193,7 +694,105 @@ subroutine psi_zaxpbyv2(m,alpha, x, beta, y, z, info) goto 9999 end if - if (m>0) call zaxpbyv2(m,ione,alpha,x,lx,beta,y,ly,z,lz,info) + if (alpha.eq.zzero) then + if (beta.eq.zzero) then + !$omp parallel do private(i) + do i=1,m + Z(i) = zzero + enddo + else if (beta.eq.zone) then + ! + ! Do nothing! + ! + + else if (beta.eq.-zone) then + !$omp parallel do private(i) + do i=1,m + Z(i) = - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + Z(i) = beta*y(i) + enddo + endif + + else if (alpha.eq.zone) then + + if (beta.eq.zzero) then + !$omp parallel do private(i) + do i=1,m + Z(i) = x(i) + enddo + else if (beta.eq.zone) then + !$omp parallel do private(i) + do i=1,m + Z(i) = x(i) + y(i) + enddo + + else if (beta.eq.-zone) then + !$omp parallel do private(i) + do i=1,m + Z(i) = x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + Z(i) = x(i) + beta*y(i) + enddo + endif + + else if (alpha.eq.-zone) then + + if (beta.eq.zzero) then + !$omp parallel do private(i) + do i=1,m + Z(i) = -x(i) + enddo + else if (beta.eq.zone) then + !$omp parallel do private(i) + do i=1,m + Z(i) = -x(i) + y(i) + enddo + + else if (beta.eq.-zone) then + !$omp parallel do private(i) + do i=1,m + Z(i) = -x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + Z(i) = -x(i) + beta*y(i) + enddo + endif + + else + + if (beta.eq.zzero) then + !$omp parallel do private(i) + do i=1,m + Z(i) = alpha*x(i) + enddo + else if (beta.eq.zone) then + !$omp parallel do private(i) + do i=1,m + Z(i) = alpha*x(i) + y(i) + enddo + + else if (beta.eq.-zone) then + !$omp parallel do private(i) + do i=1,m + Z(i) = alpha*x(i) - y(i) + enddo + else + !$omp parallel do private(i) + do i=1,m + Z(i) = alpha*x(i) + beta*y(i) + enddo + endif + + endif call psb_erractionrestore(err_act) return @@ -541,6 +1140,7 @@ subroutine zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (alpha.eq.zzero) then if (beta.eq.zzero) then do j=1, n + !$omp parallel do private(i) do i=1,m y(i,j) = zzero enddo @@ -552,12 +1152,14 @@ subroutine zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-zone) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = beta*y(i,j) enddo @@ -568,12 +1170,14 @@ subroutine zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.zzero) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = x(i,j) enddo enddo else if (beta.eq.zone) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = x(i,j) + y(i,j) enddo @@ -581,12 +1185,14 @@ subroutine zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-zone) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = x(i,j) + beta*y(i,j) enddo @@ -597,12 +1203,14 @@ subroutine zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.zzero) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = -x(i,j) enddo enddo else if (beta.eq.zone) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = -x(i,j) + y(i,j) enddo @@ -610,12 +1218,14 @@ subroutine zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-zone) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = -x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = -x(i,j) + beta*y(i,j) enddo @@ -626,12 +1236,14 @@ subroutine zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) if (beta.eq.zzero) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = alpha*x(i,j) enddo enddo else if (beta.eq.zone) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = alpha*x(i,j) + y(i,j) enddo @@ -639,12 +1251,14 @@ subroutine zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) else if (beta.eq.-zone) then do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = alpha*x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m y(i,j) = alpha*x(i,j) + beta*y(i,j) enddo @@ -730,12 +1344,14 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) else if (beta.eq.-zone) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = beta*y(i,j) enddo @@ -746,12 +1362,14 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) if (beta.eq.zzero) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = x(i,j) enddo enddo else if (beta.eq.zone) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = x(i,j) + y(i,j) enddo @@ -759,12 +1377,14 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) else if (beta.eq.-zone) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = x(i,j) + beta*y(i,j) enddo @@ -775,12 +1395,14 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) if (beta.eq.zzero) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = -x(i,j) enddo enddo else if (beta.eq.zone) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = -x(i,j) + y(i,j) enddo @@ -788,12 +1410,14 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) else if (beta.eq.-zone) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = -x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = -x(i,j) + beta*y(i,j) enddo @@ -804,12 +1428,14 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) if (beta.eq.zzero) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = alpha*x(i,j) enddo enddo else if (beta.eq.zone) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = alpha*x(i,j) + y(i,j) enddo @@ -817,12 +1443,14 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) else if (beta.eq.-zone) then do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = alpha*x(i,j) - y(i,j) enddo enddo else do j=1,n + !$omp parallel do private(i) do i=1,m Z(i,j) = alpha*x(i,j) + beta*y(i,j) enddo diff --git a/base/tools/Makefile b/base/tools/Makefile index c8b488d3..1227da3f 100644 --- a/base/tools/Makefile +++ b/base/tools/Makefile @@ -27,7 +27,8 @@ FOBJS = psb_cdall.o psb_cdals.o psb_cdalv.o psb_cd_inloc.o psb_cdins.o psb_cdprt psb_s_map.o psb_d_map.o psb_c_map.o psb_z_map.o \ psb_s_par_csr_spspmm.o psb_d_par_csr_spspmm.o psb_c_par_csr_spspmm.o psb_z_par_csr_spspmm.o \ psb_s_glob_transpose.o psb_d_glob_transpose.o psb_c_glob_transpose.o psb_z_glob_transpose.o \ - psb_cgetelem.o psb_dgetelem.o psb_sgetelem.o psb_zgetelem.o + psb_cgetelem.o psb_dgetelem.o psb_sgetelem.o psb_zgetelem.o \ + psb_c_remap.o psb_s_remap.o psb_d_remap.o psb_z_remap.o # psb_lallc.o psb_lasb.o psb_lfree.o psb_lins.o \ MPFOBJS = psb_icdasb.o psb_ssphalo.o psb_dsphalo.o psb_csphalo.o psb_zsphalo.o \ diff --git a/base/tools/psb_c_remap.F90 b/base/tools/psb_c_remap.F90 new file mode 100644 index 00000000..881b2ad0 --- /dev/null +++ b/base/tools/psb_c_remap.F90 @@ -0,0 +1,255 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! Subroutine: psb_c_remap +! +! Arguments: +! desc_in - type(psb_desc_type). The communication descriptor to be cloned. +! desc_out - type(psb_desc_type). The output communication descriptor. +! info - integer. Return code. +subroutine psb_c_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & + & desc_out, a_out, info) + + use psb_base_mod, psb_protect_name => psb_c_remap + + implicit none + !....parameters... + integer(psb_ipk_), intent(in) :: np_remap + type(psb_desc_type), intent(inout) :: desc_in + type(psb_cspmat_type), intent(inout) :: a_in + type(psb_cspmat_type), intent(out) :: a_out + type(psb_desc_type), intent(out) :: desc_out + integer(psb_ipk_), intent(out) :: ipd + integer(psb_ipk_), allocatable, intent(out) :: isrc(:), nrsrc(:), naggr(:) + integer(psb_ipk_), intent(out) :: info + + + ! locals + type(psb_ctxt_type) :: ctxt, newctxt + integer(psb_ipk_) :: np, me, err_act + integer(psb_ipk_) :: rnp, rme + integer(psb_ipk_) :: ipdest, id1, id2, imd, i, nsrc + integer(psb_ipk_), allocatable :: newnl(:), nzsrc(:), ids(:) + type(psb_lc_coo_sparse_mat) :: acoo_snd, acoo_rcv + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + if (psb_get_errstatus() /= 0) return + info = psb_success_ + call psb_erractionsave(err_act) + name = 'psb_cdcpy' + + ctxt = desc_in%get_context() + + ! check on blacs grid + call psb_info(ctxt, me, np) + if (debug_level >= psb_debug_ext_) & + & write(debug_unit,*) me,' ',trim(name),': Entered' + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + +!!$ write(0,*) ' Remapping from ',np,' onto ', np_remap + + if (desc_in%get_fmt() == 'BLOCK') then + ! + ! Should we spread the processes in the new context, + ! or should we keep them close? + ! + if (.true.) then + allocate(ids(0:np_remap-1)) + if (np_remap <= np/2) then + ids(0) = 0 + do ipdest=1,np_remap -1 + ids(ipdest) = ids(ipdest-1) + np/np_remap + end do +!!$ write(0,*) ' IDS ',ids(:) + else + do ipdest = 0, np_remap-1 + ids(ipdest) = ipdest + end do + end if + call psb_init(newctxt,np=np_remap,basectxt=ctxt,ids=ids) + else + call psb_init(newctxt,np=np_remap,basectxt=ctxt) + end if + + call psb_info(newctxt,rme,rnp) +!!$ write(0,*) 'Old context: ',me,np,' New context: ',rme,rnp + call psb_bcast(ctxt,rnp) + allocate(newnl(rnp),naggr(np),stat=info) + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + endif + if (rnp >= np) then + write(0,*) ' No remapping on larger proc count now' + info = psb_err_internal_error_ + call psb_errpush(info,name) + goto 9999 + end if + naggr = 0 + + ! + ! Compute destination for my data. + ! Simplistic reallocation: divide the NP processes + ! across the new ones (as balanced as possible), + ! then send all data from old to new process + ! + id2 = np/rnp + id1 = id2+1 + imd = mod(np,rnp) + if (me < (imd*id1)) then + ipdest = (me/id1) + else + ipdest = ( ((me-imd*id1)/id2) + imd) + end if + if (allocated(ids)) then + ipd = ids(ipdest) + else + ipd = ipdest + end if +!!$ write(0,*) ' Sending my data from ',me,' to ', & +!!$ & ipd, 'out of ',rnp,rnp-1 + + ! + ! Compute local rows for all new + ! processes; will have a BLOCK distribution + ! + newnl = 0 + newnl(ipdest+1) = desc_in%get_local_rows() + call psb_sum(ctxt,newnl) + + if (rme>=0) then + ! + if (rme < imd) then + isrc = [ (i, i=rme*id1,min(rme*id1+id1-1,np-1)) ] + else + isrc = [ (i, i= imd*id1+((rme-imd))*id2,& + & min(imd*id1+(rme-imd)*id2+id2-1,np-1) ) ] + end if +!!$ write(0,*) me,rme,imd,' ISRC: ',isrc(:) + nsrc = size(isrc) +!!$ write(0,*) me,rme,'In ',desc_in%get_local_rows(),desc_in%get_global_rows(),& +!!$ & ' out ',desc_out%get_local_rows(),desc_out%get_global_rows() + else +!!$ write(0,*) me,rme,'In ',desc_in%get_local_rows(),desc_in%get_global_rows(),& +!!$ & ' out ',0,0 + end if + else + write(0,*) 'Right now only BLOCK on input ' + info = psb_err_invalid_cd_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + ! + ! Collect matrices on their destinations + ! + block + integer(psb_ipk_) :: nzsnd, nzrcv, ip + integer(psb_ipk_) :: nrl, ncl, nzl, nzp + call a_in%cp_to(acoo_snd) + nzsnd = acoo_snd%get_nzeros() + call psb_snd(ctxt,nzsnd,ipd) + call psb_snd(ctxt,desc_in%get_local_rows(),ipd) + ! Convert to global numbering + call psb_loc_to_glob(acoo_snd%ia(1:nzsnd),desc_in,info) + call psb_loc_to_glob(acoo_snd%ja(1:nzsnd),desc_in,info) + + call psb_snd(ctxt,acoo_snd%ia(1:nzsnd),ipd) + call psb_snd(ctxt,acoo_snd%ja(1:nzsnd),ipd) + call psb_snd(ctxt,acoo_snd%val(1:nzsnd),ipd) + + if (rme>=0) then + ! prepare to receive + nzsrc = isrc + nrsrc = isrc + nzl = 0 + do ip=1, nsrc + call psb_rcv(ctxt,nzsrc(ip),isrc(ip)) + call psb_rcv(ctxt,nrsrc(ip),isrc(ip)) + nzl = nzl + nzsrc(ip) + end do +!!$ write(0,*) rme,' Check on NR:',newnl(rme+1),sum(nrsrc) + call acoo_rcv%allocate(newnl(rme+1),newnl(rme+1),nzl) + nrl = acoo_rcv%get_nrows() + ncl = acoo_rcv%get_ncols() + nzp = 0 + do ip=1, nsrc + call psb_rcv(ctxt,acoo_rcv%ia(nzp+1:nzp+nzsrc(ip)),isrc(ip)) + call psb_rcv(ctxt,acoo_rcv%ja(nzp+1:nzp+nzsrc(ip)),isrc(ip)) + call psb_rcv(ctxt,acoo_rcv%val(nzp+1:nzp+nzsrc(ip)),isrc(ip)) + nzp = nzp + nzsrc(ip) + end do + call acoo_rcv%set_nzeros(nzp) +!!$ write(0,*) rme,' Collected: ',& +!!$ & acoo_rcv%get_nrows(),acoo_rcv%get_ncols(),acoo_rcv%get_nzeros() + + ! + ! New descriptor + ! + call psb_cdall(newctxt,desc_out,info,nl=newnl(rme+1)) + ! Insert + call psb_spall(a_out,desc_out,info) + call psb_spins(nzp,acoo_rcv%ia(1:nzp),acoo_rcv%ja(1:nzp),& + & acoo_rcv%val(1:nzp),a_out,desc_out,info) + ! Assemble + call psb_cdasb(desc_out,info) + call psb_spasb(a_out,desc_out,info) + +!!$ write(0,*) rme,' Regenerated: ',& +!!$ & desc_out%get_local_rows(), desc_out%get_local_cols(),& +!!$ & a_out%get_nrows(),a_out%get_ncols(),a_out%get_nzeros() + naggr(me+1) = desc_out%get_local_rows() + else + naggr(me+1) = 0 + end if + call psb_sum(ctxt,naggr) + + end block + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_c_remap diff --git a/base/tools/psb_cspalloc.f90 b/base/tools/psb_cspalloc.f90 index cac6c1e6..69b9e1c2 100644 --- a/base/tools/psb_cspalloc.f90 +++ b/base/tools/psb_cspalloc.f90 @@ -102,7 +102,7 @@ subroutine psb_cspalloc(a, desc_a, info, nnz) & ':allocating size:',loc_row,loc_col,nnz_ call a%free() !....allocate aspk, ia1, ia2..... - call a%csall(loc_row,loc_col,info,nz=nnz_) + call a%allocate(loc_row,loc_col,info,nz=nnz_) if(info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='sp_all') diff --git a/base/tools/psb_d_remap.F90 b/base/tools/psb_d_remap.F90 new file mode 100644 index 00000000..2157b56b --- /dev/null +++ b/base/tools/psb_d_remap.F90 @@ -0,0 +1,255 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! Subroutine: psb_d_remap +! +! Arguments: +! desc_in - type(psb_desc_type). The communication descriptor to be cloned. +! desc_out - type(psb_desc_type). The output communication descriptor. +! info - integer. Return code. +subroutine psb_d_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & + & desc_out, a_out, info) + + use psb_base_mod, psb_protect_name => psb_d_remap + + implicit none + !....parameters... + integer(psb_ipk_), intent(in) :: np_remap + type(psb_desc_type), intent(inout) :: desc_in + type(psb_dspmat_type), intent(inout) :: a_in + type(psb_dspmat_type), intent(out) :: a_out + type(psb_desc_type), intent(out) :: desc_out + integer(psb_ipk_), intent(out) :: ipd + integer(psb_ipk_), allocatable, intent(out) :: isrc(:), nrsrc(:), naggr(:) + integer(psb_ipk_), intent(out) :: info + + + ! locals + type(psb_ctxt_type) :: ctxt, newctxt + integer(psb_ipk_) :: np, me, err_act + integer(psb_ipk_) :: rnp, rme + integer(psb_ipk_) :: ipdest, id1, id2, imd, i, nsrc + integer(psb_ipk_), allocatable :: newnl(:), nzsrc(:), ids(:) + type(psb_ld_coo_sparse_mat) :: acoo_snd, acoo_rcv + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + if (psb_get_errstatus() /= 0) return + info = psb_success_ + call psb_erractionsave(err_act) + name = 'psb_cdcpy' + + ctxt = desc_in%get_context() + + ! check on blacs grid + call psb_info(ctxt, me, np) + if (debug_level >= psb_debug_ext_) & + & write(debug_unit,*) me,' ',trim(name),': Entered' + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + +!!$ write(0,*) ' Remapping from ',np,' onto ', np_remap + + if (desc_in%get_fmt() == 'BLOCK') then + ! + ! Should we spread the processes in the new context, + ! or should we keep them close? + ! + if (.true.) then + allocate(ids(0:np_remap-1)) + if (np_remap <= np/2) then + ids(0) = 0 + do ipdest=1,np_remap -1 + ids(ipdest) = ids(ipdest-1) + np/np_remap + end do +!!$ write(0,*) ' IDS ',ids(:) + else + do ipdest = 0, np_remap-1 + ids(ipdest) = ipdest + end do + end if + call psb_init(newctxt,np=np_remap,basectxt=ctxt,ids=ids) + else + call psb_init(newctxt,np=np_remap,basectxt=ctxt) + end if + + call psb_info(newctxt,rme,rnp) +!!$ write(0,*) 'Old context: ',me,np,' New context: ',rme,rnp + call psb_bcast(ctxt,rnp) + allocate(newnl(rnp),naggr(np),stat=info) + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + endif + if (rnp >= np) then + write(0,*) ' No remapping on larger proc count now' + info = psb_err_internal_error_ + call psb_errpush(info,name) + goto 9999 + end if + naggr = 0 + + ! + ! Compute destination for my data. + ! Simplistic reallocation: divide the NP processes + ! across the new ones (as balanced as possible), + ! then send all data from old to new process + ! + id2 = np/rnp + id1 = id2+1 + imd = mod(np,rnp) + if (me < (imd*id1)) then + ipdest = (me/id1) + else + ipdest = ( ((me-imd*id1)/id2) + imd) + end if + if (allocated(ids)) then + ipd = ids(ipdest) + else + ipd = ipdest + end if +!!$ write(0,*) ' Sending my data from ',me,' to ', & +!!$ & ipd, 'out of ',rnp,rnp-1 + + ! + ! Compute local rows for all new + ! processes; will have a BLOCK distribution + ! + newnl = 0 + newnl(ipdest+1) = desc_in%get_local_rows() + call psb_sum(ctxt,newnl) + + if (rme>=0) then + ! + if (rme < imd) then + isrc = [ (i, i=rme*id1,min(rme*id1+id1-1,np-1)) ] + else + isrc = [ (i, i= imd*id1+((rme-imd))*id2,& + & min(imd*id1+(rme-imd)*id2+id2-1,np-1) ) ] + end if +!!$ write(0,*) me,rme,imd,' ISRC: ',isrc(:) + nsrc = size(isrc) +!!$ write(0,*) me,rme,'In ',desc_in%get_local_rows(),desc_in%get_global_rows(),& +!!$ & ' out ',desc_out%get_local_rows(),desc_out%get_global_rows() + else +!!$ write(0,*) me,rme,'In ',desc_in%get_local_rows(),desc_in%get_global_rows(),& +!!$ & ' out ',0,0 + end if + else + write(0,*) 'Right now only BLOCK on input ' + info = psb_err_invalid_cd_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + ! + ! Collect matrices on their destinations + ! + block + integer(psb_ipk_) :: nzsnd, nzrcv, ip + integer(psb_ipk_) :: nrl, ncl, nzl, nzp + call a_in%cp_to(acoo_snd) + nzsnd = acoo_snd%get_nzeros() + call psb_snd(ctxt,nzsnd,ipd) + call psb_snd(ctxt,desc_in%get_local_rows(),ipd) + ! Convert to global numbering + call psb_loc_to_glob(acoo_snd%ia(1:nzsnd),desc_in,info) + call psb_loc_to_glob(acoo_snd%ja(1:nzsnd),desc_in,info) + + call psb_snd(ctxt,acoo_snd%ia(1:nzsnd),ipd) + call psb_snd(ctxt,acoo_snd%ja(1:nzsnd),ipd) + call psb_snd(ctxt,acoo_snd%val(1:nzsnd),ipd) + + if (rme>=0) then + ! prepare to receive + nzsrc = isrc + nrsrc = isrc + nzl = 0 + do ip=1, nsrc + call psb_rcv(ctxt,nzsrc(ip),isrc(ip)) + call psb_rcv(ctxt,nrsrc(ip),isrc(ip)) + nzl = nzl + nzsrc(ip) + end do +!!$ write(0,*) rme,' Check on NR:',newnl(rme+1),sum(nrsrc) + call acoo_rcv%allocate(newnl(rme+1),newnl(rme+1),nzl) + nrl = acoo_rcv%get_nrows() + ncl = acoo_rcv%get_ncols() + nzp = 0 + do ip=1, nsrc + call psb_rcv(ctxt,acoo_rcv%ia(nzp+1:nzp+nzsrc(ip)),isrc(ip)) + call psb_rcv(ctxt,acoo_rcv%ja(nzp+1:nzp+nzsrc(ip)),isrc(ip)) + call psb_rcv(ctxt,acoo_rcv%val(nzp+1:nzp+nzsrc(ip)),isrc(ip)) + nzp = nzp + nzsrc(ip) + end do + call acoo_rcv%set_nzeros(nzp) +!!$ write(0,*) rme,' Collected: ',& +!!$ & acoo_rcv%get_nrows(),acoo_rcv%get_ncols(),acoo_rcv%get_nzeros() + + ! + ! New descriptor + ! + call psb_cdall(newctxt,desc_out,info,nl=newnl(rme+1)) + ! Insert + call psb_spall(a_out,desc_out,info) + call psb_spins(nzp,acoo_rcv%ia(1:nzp),acoo_rcv%ja(1:nzp),& + & acoo_rcv%val(1:nzp),a_out,desc_out,info) + ! Assemble + call psb_cdasb(desc_out,info) + call psb_spasb(a_out,desc_out,info) + +!!$ write(0,*) rme,' Regenerated: ',& +!!$ & desc_out%get_local_rows(), desc_out%get_local_cols(),& +!!$ & a_out%get_nrows(),a_out%get_ncols(),a_out%get_nzeros() + naggr(me+1) = desc_out%get_local_rows() + else + naggr(me+1) = 0 + end if + call psb_sum(ctxt,naggr) + + end block + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_d_remap diff --git a/base/tools/psb_dspalloc.f90 b/base/tools/psb_dspalloc.f90 index cae01838..56ad6c93 100644 --- a/base/tools/psb_dspalloc.f90 +++ b/base/tools/psb_dspalloc.f90 @@ -102,7 +102,7 @@ subroutine psb_dspalloc(a, desc_a, info, nnz) & ':allocating size:',loc_row,loc_col,nnz_ call a%free() !....allocate aspk, ia1, ia2..... - call a%csall(loc_row,loc_col,info,nz=nnz_) + call a%allocate(loc_row,loc_col,info,nz=nnz_) if(info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='sp_all') diff --git a/base/tools/psb_s_remap.F90 b/base/tools/psb_s_remap.F90 new file mode 100644 index 00000000..899c1b26 --- /dev/null +++ b/base/tools/psb_s_remap.F90 @@ -0,0 +1,255 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! Subroutine: psb_s_remap +! +! Arguments: +! desc_in - type(psb_desc_type). The communication descriptor to be cloned. +! desc_out - type(psb_desc_type). The output communication descriptor. +! info - integer. Return code. +subroutine psb_s_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & + & desc_out, a_out, info) + + use psb_base_mod, psb_protect_name => psb_s_remap + + implicit none + !....parameters... + integer(psb_ipk_), intent(in) :: np_remap + type(psb_desc_type), intent(inout) :: desc_in + type(psb_sspmat_type), intent(inout) :: a_in + type(psb_sspmat_type), intent(out) :: a_out + type(psb_desc_type), intent(out) :: desc_out + integer(psb_ipk_), intent(out) :: ipd + integer(psb_ipk_), allocatable, intent(out) :: isrc(:), nrsrc(:), naggr(:) + integer(psb_ipk_), intent(out) :: info + + + ! locals + type(psb_ctxt_type) :: ctxt, newctxt + integer(psb_ipk_) :: np, me, err_act + integer(psb_ipk_) :: rnp, rme + integer(psb_ipk_) :: ipdest, id1, id2, imd, i, nsrc + integer(psb_ipk_), allocatable :: newnl(:), nzsrc(:), ids(:) + type(psb_ls_coo_sparse_mat) :: acoo_snd, acoo_rcv + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + if (psb_get_errstatus() /= 0) return + info = psb_success_ + call psb_erractionsave(err_act) + name = 'psb_cdcpy' + + ctxt = desc_in%get_context() + + ! check on blacs grid + call psb_info(ctxt, me, np) + if (debug_level >= psb_debug_ext_) & + & write(debug_unit,*) me,' ',trim(name),': Entered' + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + +!!$ write(0,*) ' Remapping from ',np,' onto ', np_remap + + if (desc_in%get_fmt() == 'BLOCK') then + ! + ! Should we spread the processes in the new context, + ! or should we keep them close? + ! + if (.true.) then + allocate(ids(0:np_remap-1)) + if (np_remap <= np/2) then + ids(0) = 0 + do ipdest=1,np_remap -1 + ids(ipdest) = ids(ipdest-1) + np/np_remap + end do +!!$ write(0,*) ' IDS ',ids(:) + else + do ipdest = 0, np_remap-1 + ids(ipdest) = ipdest + end do + end if + call psb_init(newctxt,np=np_remap,basectxt=ctxt,ids=ids) + else + call psb_init(newctxt,np=np_remap,basectxt=ctxt) + end if + + call psb_info(newctxt,rme,rnp) +!!$ write(0,*) 'Old context: ',me,np,' New context: ',rme,rnp + call psb_bcast(ctxt,rnp) + allocate(newnl(rnp),naggr(np),stat=info) + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + endif + if (rnp >= np) then + write(0,*) ' No remapping on larger proc count now' + info = psb_err_internal_error_ + call psb_errpush(info,name) + goto 9999 + end if + naggr = 0 + + ! + ! Compute destination for my data. + ! Simplistic reallocation: divide the NP processes + ! across the new ones (as balanced as possible), + ! then send all data from old to new process + ! + id2 = np/rnp + id1 = id2+1 + imd = mod(np,rnp) + if (me < (imd*id1)) then + ipdest = (me/id1) + else + ipdest = ( ((me-imd*id1)/id2) + imd) + end if + if (allocated(ids)) then + ipd = ids(ipdest) + else + ipd = ipdest + end if +!!$ write(0,*) ' Sending my data from ',me,' to ', & +!!$ & ipd, 'out of ',rnp,rnp-1 + + ! + ! Compute local rows for all new + ! processes; will have a BLOCK distribution + ! + newnl = 0 + newnl(ipdest+1) = desc_in%get_local_rows() + call psb_sum(ctxt,newnl) + + if (rme>=0) then + ! + if (rme < imd) then + isrc = [ (i, i=rme*id1,min(rme*id1+id1-1,np-1)) ] + else + isrc = [ (i, i= imd*id1+((rme-imd))*id2,& + & min(imd*id1+(rme-imd)*id2+id2-1,np-1) ) ] + end if +!!$ write(0,*) me,rme,imd,' ISRC: ',isrc(:) + nsrc = size(isrc) +!!$ write(0,*) me,rme,'In ',desc_in%get_local_rows(),desc_in%get_global_rows(),& +!!$ & ' out ',desc_out%get_local_rows(),desc_out%get_global_rows() + else +!!$ write(0,*) me,rme,'In ',desc_in%get_local_rows(),desc_in%get_global_rows(),& +!!$ & ' out ',0,0 + end if + else + write(0,*) 'Right now only BLOCK on input ' + info = psb_err_invalid_cd_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + ! + ! Collect matrices on their destinations + ! + block + integer(psb_ipk_) :: nzsnd, nzrcv, ip + integer(psb_ipk_) :: nrl, ncl, nzl, nzp + call a_in%cp_to(acoo_snd) + nzsnd = acoo_snd%get_nzeros() + call psb_snd(ctxt,nzsnd,ipd) + call psb_snd(ctxt,desc_in%get_local_rows(),ipd) + ! Convert to global numbering + call psb_loc_to_glob(acoo_snd%ia(1:nzsnd),desc_in,info) + call psb_loc_to_glob(acoo_snd%ja(1:nzsnd),desc_in,info) + + call psb_snd(ctxt,acoo_snd%ia(1:nzsnd),ipd) + call psb_snd(ctxt,acoo_snd%ja(1:nzsnd),ipd) + call psb_snd(ctxt,acoo_snd%val(1:nzsnd),ipd) + + if (rme>=0) then + ! prepare to receive + nzsrc = isrc + nrsrc = isrc + nzl = 0 + do ip=1, nsrc + call psb_rcv(ctxt,nzsrc(ip),isrc(ip)) + call psb_rcv(ctxt,nrsrc(ip),isrc(ip)) + nzl = nzl + nzsrc(ip) + end do +!!$ write(0,*) rme,' Check on NR:',newnl(rme+1),sum(nrsrc) + call acoo_rcv%allocate(newnl(rme+1),newnl(rme+1),nzl) + nrl = acoo_rcv%get_nrows() + ncl = acoo_rcv%get_ncols() + nzp = 0 + do ip=1, nsrc + call psb_rcv(ctxt,acoo_rcv%ia(nzp+1:nzp+nzsrc(ip)),isrc(ip)) + call psb_rcv(ctxt,acoo_rcv%ja(nzp+1:nzp+nzsrc(ip)),isrc(ip)) + call psb_rcv(ctxt,acoo_rcv%val(nzp+1:nzp+nzsrc(ip)),isrc(ip)) + nzp = nzp + nzsrc(ip) + end do + call acoo_rcv%set_nzeros(nzp) +!!$ write(0,*) rme,' Collected: ',& +!!$ & acoo_rcv%get_nrows(),acoo_rcv%get_ncols(),acoo_rcv%get_nzeros() + + ! + ! New descriptor + ! + call psb_cdall(newctxt,desc_out,info,nl=newnl(rme+1)) + ! Insert + call psb_spall(a_out,desc_out,info) + call psb_spins(nzp,acoo_rcv%ia(1:nzp),acoo_rcv%ja(1:nzp),& + & acoo_rcv%val(1:nzp),a_out,desc_out,info) + ! Assemble + call psb_cdasb(desc_out,info) + call psb_spasb(a_out,desc_out,info) + +!!$ write(0,*) rme,' Regenerated: ',& +!!$ & desc_out%get_local_rows(), desc_out%get_local_cols(),& +!!$ & a_out%get_nrows(),a_out%get_ncols(),a_out%get_nzeros() + naggr(me+1) = desc_out%get_local_rows() + else + naggr(me+1) = 0 + end if + call psb_sum(ctxt,naggr) + + end block + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_s_remap diff --git a/base/tools/psb_sspalloc.f90 b/base/tools/psb_sspalloc.f90 index 10e37d58..15c3c538 100644 --- a/base/tools/psb_sspalloc.f90 +++ b/base/tools/psb_sspalloc.f90 @@ -102,7 +102,7 @@ subroutine psb_sspalloc(a, desc_a, info, nnz) & ':allocating size:',loc_row,loc_col,nnz_ call a%free() !....allocate aspk, ia1, ia2..... - call a%csall(loc_row,loc_col,info,nz=nnz_) + call a%allocate(loc_row,loc_col,info,nz=nnz_) if(info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='sp_all') diff --git a/base/tools/psb_z_remap.F90 b/base/tools/psb_z_remap.F90 new file mode 100644 index 00000000..f9c5c39c --- /dev/null +++ b/base/tools/psb_z_remap.F90 @@ -0,0 +1,255 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! Subroutine: psb_z_remap +! +! Arguments: +! desc_in - type(psb_desc_type). The communication descriptor to be cloned. +! desc_out - type(psb_desc_type). The output communication descriptor. +! info - integer. Return code. +subroutine psb_z_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & + & desc_out, a_out, info) + + use psb_base_mod, psb_protect_name => psb_z_remap + + implicit none + !....parameters... + integer(psb_ipk_), intent(in) :: np_remap + type(psb_desc_type), intent(inout) :: desc_in + type(psb_zspmat_type), intent(inout) :: a_in + type(psb_zspmat_type), intent(out) :: a_out + type(psb_desc_type), intent(out) :: desc_out + integer(psb_ipk_), intent(out) :: ipd + integer(psb_ipk_), allocatable, intent(out) :: isrc(:), nrsrc(:), naggr(:) + integer(psb_ipk_), intent(out) :: info + + + ! locals + type(psb_ctxt_type) :: ctxt, newctxt + integer(psb_ipk_) :: np, me, err_act + integer(psb_ipk_) :: rnp, rme + integer(psb_ipk_) :: ipdest, id1, id2, imd, i, nsrc + integer(psb_ipk_), allocatable :: newnl(:), nzsrc(:), ids(:) + type(psb_lz_coo_sparse_mat) :: acoo_snd, acoo_rcv + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + if (psb_get_errstatus() /= 0) return + info = psb_success_ + call psb_erractionsave(err_act) + name = 'psb_cdcpy' + + ctxt = desc_in%get_context() + + ! check on blacs grid + call psb_info(ctxt, me, np) + if (debug_level >= psb_debug_ext_) & + & write(debug_unit,*) me,' ',trim(name),': Entered' + if (np == -1) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + +!!$ write(0,*) ' Remapping from ',np,' onto ', np_remap + + if (desc_in%get_fmt() == 'BLOCK') then + ! + ! Should we spread the processes in the new context, + ! or should we keep them close? + ! + if (.true.) then + allocate(ids(0:np_remap-1)) + if (np_remap <= np/2) then + ids(0) = 0 + do ipdest=1,np_remap -1 + ids(ipdest) = ids(ipdest-1) + np/np_remap + end do +!!$ write(0,*) ' IDS ',ids(:) + else + do ipdest = 0, np_remap-1 + ids(ipdest) = ipdest + end do + end if + call psb_init(newctxt,np=np_remap,basectxt=ctxt,ids=ids) + else + call psb_init(newctxt,np=np_remap,basectxt=ctxt) + end if + + call psb_info(newctxt,rme,rnp) +!!$ write(0,*) 'Old context: ',me,np,' New context: ',rme,rnp + call psb_bcast(ctxt,rnp) + allocate(newnl(rnp),naggr(np),stat=info) + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + endif + if (rnp >= np) then + write(0,*) ' No remapping on larger proc count now' + info = psb_err_internal_error_ + call psb_errpush(info,name) + goto 9999 + end if + naggr = 0 + + ! + ! Compute destination for my data. + ! Simplistic reallocation: divide the NP processes + ! across the new ones (as balanced as possible), + ! then send all data from old to new process + ! + id2 = np/rnp + id1 = id2+1 + imd = mod(np,rnp) + if (me < (imd*id1)) then + ipdest = (me/id1) + else + ipdest = ( ((me-imd*id1)/id2) + imd) + end if + if (allocated(ids)) then + ipd = ids(ipdest) + else + ipd = ipdest + end if +!!$ write(0,*) ' Sending my data from ',me,' to ', & +!!$ & ipd, 'out of ',rnp,rnp-1 + + ! + ! Compute local rows for all new + ! processes; will have a BLOCK distribution + ! + newnl = 0 + newnl(ipdest+1) = desc_in%get_local_rows() + call psb_sum(ctxt,newnl) + + if (rme>=0) then + ! + if (rme < imd) then + isrc = [ (i, i=rme*id1,min(rme*id1+id1-1,np-1)) ] + else + isrc = [ (i, i= imd*id1+((rme-imd))*id2,& + & min(imd*id1+(rme-imd)*id2+id2-1,np-1) ) ] + end if +!!$ write(0,*) me,rme,imd,' ISRC: ',isrc(:) + nsrc = size(isrc) +!!$ write(0,*) me,rme,'In ',desc_in%get_local_rows(),desc_in%get_global_rows(),& +!!$ & ' out ',desc_out%get_local_rows(),desc_out%get_global_rows() + else +!!$ write(0,*) me,rme,'In ',desc_in%get_local_rows(),desc_in%get_global_rows(),& +!!$ & ' out ',0,0 + end if + else + write(0,*) 'Right now only BLOCK on input ' + info = psb_err_invalid_cd_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + ! + ! Collect matrices on their destinations + ! + block + integer(psb_ipk_) :: nzsnd, nzrcv, ip + integer(psb_ipk_) :: nrl, ncl, nzl, nzp + call a_in%cp_to(acoo_snd) + nzsnd = acoo_snd%get_nzeros() + call psb_snd(ctxt,nzsnd,ipd) + call psb_snd(ctxt,desc_in%get_local_rows(),ipd) + ! Convert to global numbering + call psb_loc_to_glob(acoo_snd%ia(1:nzsnd),desc_in,info) + call psb_loc_to_glob(acoo_snd%ja(1:nzsnd),desc_in,info) + + call psb_snd(ctxt,acoo_snd%ia(1:nzsnd),ipd) + call psb_snd(ctxt,acoo_snd%ja(1:nzsnd),ipd) + call psb_snd(ctxt,acoo_snd%val(1:nzsnd),ipd) + + if (rme>=0) then + ! prepare to receive + nzsrc = isrc + nrsrc = isrc + nzl = 0 + do ip=1, nsrc + call psb_rcv(ctxt,nzsrc(ip),isrc(ip)) + call psb_rcv(ctxt,nrsrc(ip),isrc(ip)) + nzl = nzl + nzsrc(ip) + end do +!!$ write(0,*) rme,' Check on NR:',newnl(rme+1),sum(nrsrc) + call acoo_rcv%allocate(newnl(rme+1),newnl(rme+1),nzl) + nrl = acoo_rcv%get_nrows() + ncl = acoo_rcv%get_ncols() + nzp = 0 + do ip=1, nsrc + call psb_rcv(ctxt,acoo_rcv%ia(nzp+1:nzp+nzsrc(ip)),isrc(ip)) + call psb_rcv(ctxt,acoo_rcv%ja(nzp+1:nzp+nzsrc(ip)),isrc(ip)) + call psb_rcv(ctxt,acoo_rcv%val(nzp+1:nzp+nzsrc(ip)),isrc(ip)) + nzp = nzp + nzsrc(ip) + end do + call acoo_rcv%set_nzeros(nzp) +!!$ write(0,*) rme,' Collected: ',& +!!$ & acoo_rcv%get_nrows(),acoo_rcv%get_ncols(),acoo_rcv%get_nzeros() + + ! + ! New descriptor + ! + call psb_cdall(newctxt,desc_out,info,nl=newnl(rme+1)) + ! Insert + call psb_spall(a_out,desc_out,info) + call psb_spins(nzp,acoo_rcv%ia(1:nzp),acoo_rcv%ja(1:nzp),& + & acoo_rcv%val(1:nzp),a_out,desc_out,info) + ! Assemble + call psb_cdasb(desc_out,info) + call psb_spasb(a_out,desc_out,info) + +!!$ write(0,*) rme,' Regenerated: ',& +!!$ & desc_out%get_local_rows(), desc_out%get_local_cols(),& +!!$ & a_out%get_nrows(),a_out%get_ncols(),a_out%get_nzeros() + naggr(me+1) = desc_out%get_local_rows() + else + naggr(me+1) = 0 + end if + call psb_sum(ctxt,naggr) + + end block + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_z_remap diff --git a/base/tools/psb_zspalloc.f90 b/base/tools/psb_zspalloc.f90 index 823fee7a..16d48734 100644 --- a/base/tools/psb_zspalloc.f90 +++ b/base/tools/psb_zspalloc.f90 @@ -102,7 +102,7 @@ subroutine psb_zspalloc(a, desc_a, info, nnz) & ':allocating size:',loc_row,loc_col,nnz_ call a%free() !....allocate aspk, ia1, ia2..... - call a%csall(loc_row,loc_col,info,nz=nnz_) + call a%allocate(loc_row,loc_col,info,nz=nnz_) if(info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='sp_all') diff --git a/config/pac.m4 b/config/pac.m4 index 46a56b68..ecf8e475 100644 --- a/config/pac.m4 +++ b/config/pac.m4 @@ -363,6 +363,45 @@ fi ] ) +dnl @synopsis PAC_ARG_OPENMP +dnl +dnl Test for --enable-openmp +dnl +dnl +dnl +dnl Example use: +dnl +dnl +dnl @author Salvatore Filippone +dnl +AC_DEFUN([PAC_ARG_OPENMP], +[AC_MSG_CHECKING([whether we want openmp ]) +AC_ARG_ENABLE(openmp, +AC_HELP_STRING([--enable-openmp], +[Specify whether to enable openmp. ]), +[ +pac_cv_openmp="yes"; +] +dnl , +dnl [pac_cv_openmp="no";] + ) +if test x"$pac_cv_openmp" == x"yes" ; then + AC_MSG_RESULT([yes.]) + AC_LANG_PUSH([Fortran]) + AC_OPENMP() + pac_cv_openmp_fcopt="$OPENMP_FCFLAGS"; + AC_LANG_POP() + AC_LANG_PUSH([C]) + AC_OPENMP() + pac_cv_openmp_ccopt="$OPENMP_CFLAGS"; + AC_LANG_POP() +else + pac_cv_openmp="no"; + AC_MSG_RESULT([no.]) +fi +] +) + dnl @synopsis PAC_ARG_LONG_INTEGERS dnl dnl Test for --enable-long-integers diff --git a/configure b/configure index e8688547..e70bbfb7 100755 --- a/configure +++ b/configure @@ -663,6 +663,8 @@ BASEMODNAME CDEFINES FDEFINES LAPACK_LIBS +OPENMP_CFLAGS +OPENMP_FCFLAGS EGREP GREP CPP @@ -771,6 +773,7 @@ enable_dependency_tracking enable_silent_rules with_ipk with_lpk +enable_openmp with_blas with_blasdir with_lapack @@ -1420,6 +1423,8 @@ Optional Features: speeds up one-time build --enable-silent-rules less verbose build output (undo: "make V=1") --disable-silent-rules verbose build output (undo: "make V=0") + --enable-openmp Specify whether to enable openmp. + --disable-openmp do not use OpenMP Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] @@ -6890,7 +6895,191 @@ FDEFINES="$psblas_cv_define_prepend-DIPK${pac_cv_ipk_size} $FDEFINES"; FDEFINES="$psblas_cv_define_prepend-DLPK${pac_cv_lpk_size} $FDEFINES"; CDEFINES="-DIPK${pac_cv_ipk_size} -DLPK${pac_cv_lpk_size} $CDEFINES" +FLINK="$MPIFC" +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we want openmp " >&5 +$as_echo_n "checking whether we want openmp ... " >&6; } +# Check whether --enable-openmp was given. +if test "${enable_openmp+set}" = set; then : + enableval=$enable_openmp; +pac_cv_openmp="yes"; + +fi + +if test x"$pac_cv_openmp" == x"yes" ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes." >&5 +$as_echo "yes." >&6; } + ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + + + OPENMP_FCFLAGS= + # Check whether --enable-openmp was given. +if test "${enable_openmp+set}" = set; then : + enableval=$enable_openmp; +fi + + if test "$enable_openmp" != no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $FC option to support OpenMP" >&5 +$as_echo_n "checking for $FC option to support OpenMP... " >&6; } +if ${ac_cv_prog_fc_openmp+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat > conftest.$ac_ext <<_ACEOF + + program main + implicit none +!$ integer tid + tid = 42 + call omp_set_num_threads(2) + end + +_ACEOF +if ac_fn_fc_try_link "$LINENO"; then : + ac_cv_prog_fc_openmp='none needed' +else + ac_cv_prog_fc_openmp='unsupported' + for ac_option in -fopenmp -xopenmp -openmp -mp -omp -qsmp=omp -homp \ + -Popenmp --openmp; do + ac_save_FCFLAGS=$FCFLAGS + FCFLAGS="$FCFLAGS $ac_option" + cat > conftest.$ac_ext <<_ACEOF + + program main + implicit none +!$ integer tid + tid = 42 + call omp_set_num_threads(2) + end + +_ACEOF +if ac_fn_fc_try_link "$LINENO"; then : + ac_cv_prog_fc_openmp=$ac_option +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + FCFLAGS=$ac_save_FCFLAGS + if test "$ac_cv_prog_fc_openmp" != unsupported; then + break + fi + done +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_fc_openmp" >&5 +$as_echo "$ac_cv_prog_fc_openmp" >&6; } + case $ac_cv_prog_fc_openmp in #( + "none needed" | unsupported) + ;; #( + *) + OPENMP_FCFLAGS=$ac_cv_prog_fc_openmp ;; + esac + fi + + + pac_cv_openmp_fcopt="$OPENMP_FCFLAGS"; + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + OPENMP_CFLAGS= + # Check whether --enable-openmp was given. +if test "${enable_openmp+set}" = set; then : + enableval=$enable_openmp; +fi + + if test "$enable_openmp" != no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to support OpenMP" >&5 +$as_echo_n "checking for $CC option to support OpenMP... " >&6; } +if ${ac_cv_prog_c_openmp+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifndef _OPENMP + choke me +#endif +#include +int main () { return omp_get_num_threads (); } + +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_prog_c_openmp='none needed' +else + ac_cv_prog_c_openmp='unsupported' + for ac_option in -fopenmp -xopenmp -openmp -mp -omp -qsmp=omp -homp \ + -Popenmp --openmp; do + ac_save_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS $ac_option" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifndef _OPENMP + choke me +#endif +#include +int main () { return omp_get_num_threads (); } + +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_prog_c_openmp=$ac_option +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + CFLAGS=$ac_save_CFLAGS + if test "$ac_cv_prog_c_openmp" != unsupported; then + break + fi + done +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_c_openmp" >&5 +$as_echo "$ac_cv_prog_c_openmp" >&6; } + case $ac_cv_prog_c_openmp in #( + "none needed" | unsupported) + ;; #( + *) + OPENMP_CFLAGS=$ac_cv_prog_c_openmp ;; + esac + fi + + + pac_cv_openmp_ccopt="$OPENMP_CFLAGS"; + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +else + pac_cv_openmp="no"; + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no." >&5 +$as_echo "no." >&6; } +fi + + +if test x"$pac_cv_openmp" == x"yes" ; then + FDEFINES="$psblas_cv_define_prepend-DOPENMP $FDEFINES"; + CDEFINES="-DOPENMP $CDEFINES"; + FCOPT="$FCOPT $pac_cv_openmp_fcopt"; + CCOPT="$CCOPT $pac_cv_openmp_ccopt"; + FLINK="$FLINK $pac_cv_openmp_fcopt"; +fi # # Tests for support of various Fortran features; some of them are critical, # some optional diff --git a/configure.ac b/configure.ac index a2650489..cc354ba4 100755 --- a/configure.ac +++ b/configure.ac @@ -498,7 +498,15 @@ FDEFINES="$psblas_cv_define_prepend-DIPK${pac_cv_ipk_size} $FDEFINES"; FDEFINES="$psblas_cv_define_prepend-DLPK${pac_cv_lpk_size} $FDEFINES"; CDEFINES="-DIPK${pac_cv_ipk_size} -DLPK${pac_cv_lpk_size} $CDEFINES" - +FLINK="$MPIFC" +PAC_ARG_OPENMP() +if test x"$pac_cv_openmp" == x"yes" ; then + FDEFINES="$psblas_cv_define_prepend-DOPENMP $FDEFINES"; + CDEFINES="-DOPENMP $CDEFINES"; + FCOPT="$FCOPT $pac_cv_openmp_fcopt"; + CCOPT="$CCOPT $pac_cv_openmp_ccopt"; + FLINK="$FLINK $pac_cv_openmp_fcopt"; +fi # # Tests for support of various Fortran features; some of them are critical, # some optional diff --git a/docs/src/datastruct.tex b/docs/src/datastruct.tex index 613b54ee..16e02f2f 100644 --- a/docs/src/datastruct.tex +++ b/docs/src/datastruct.tex @@ -64,7 +64,7 @@ First we describe the \verb|psb_indx_map| type. This is a data structure that keeps track of a certain number of basic issues such as: \begin{itemize} -\item The value of the communication/MPI context; +\item The value of the communication context; \item The number of indices in the index space, i.e. global number of rows and columns of a sparse matrix; \item The local set of indices, including: @@ -309,7 +309,7 @@ Type: {\bf optional}; default: \verb|.true.|.\\ \subsubsection{get\_context --- Get communication context} \begin{verbatim} -ictxt = desc%get_context() +ctxt = desc%get_context() \end{verbatim} \begin{description} diff --git a/docs/src/penv.tex b/docs/src/penv.tex index 9465c8b3..9b019187 100644 --- a/docs/src/penv.tex +++ b/docs/src/penv.tex @@ -7,7 +7,7 @@ environment} \begin{verbatim} -call psb_init(icontxt, np, basectxt, ids) +call psb_init(ctxt, np, basectxt, ids) \end{verbatim} This subroutine initializes the PSBLAS parallel environment, defining @@ -41,8 +41,8 @@ Default: use the indices $(0\dots np-1)$. \begin{description} \item[\bf On Return] -\item[icontxt] the communication context identifying the virtual - parallel machine. Note that this is always a duplicate of +\item[ctxt] the communication context identifying the virtual + parallel machine, type \verb|psb_ctxt_type|. Note that this is always a duplicate of \verb|basectxt|, so that library communications are completely separated from other communication operations.\\ Scope: {\bf global}.\\ @@ -65,7 +65,7 @@ Specified as: an integer variable. environment} \begin{verbatim} -call psb_info(icontxt, iam, np) +call psb_info(ctxt, iam, np) \end{verbatim} This subroutine returns information about the PSBLAS parallel environment, defining @@ -73,7 +73,7 @@ a virtual parallel machine. \begin{description} \item[Type:] Asynchronous. \item[\bf On Entry ] -\item[icontxt] the communication context identifying the virtual +\item[ctxt] the communication context identifying the virtual parallel machine.\\ Scope: {\bf global}.\\ Type: {\bf required}.\\ @@ -103,7 +103,7 @@ Specified as: an integer variable. \ \item If the user has requested on \verb|psb_init| a number of processes less than the total available in the parallel execution environment, the remaining processes will have on return $iam=-1$; - the only call involving \verb|icontxt| that any such process may + the only call involving \verb|ctxt| that any such process may execute is to \verb|psb_exit|. \end{enumerate} @@ -112,22 +112,22 @@ Specified as: an integer variable. \ environment} \begin{verbatim} -call psb_exit(icontxt) -call psb_exit(icontxt,close) +call psb_exit(ctxt) +call psb_exit(ctxt,close) \end{verbatim} This subroutine exits from the PSBLAS parallel virtual machine. \begin{description} \item[Type:] Synchronous. \item[\bf On Entry ] -\item[icontxt] the communication context identifying the virtual +\item[ctxt] the communication context identifying the virtual parallel machine.\\ Scope: {\bf global}.\\ Type: {\bf required}.\\ Intent: {\bf in}.\\ Specified as: an integer variable. \item[close] Whether to close all data structures related to the - virtual parallel machine, besides those associated with icontxt.\\ + virtual parallel machine, besides those associated with ctxt.\\ Scope: {\bf global}.\\ Type: {\bf optional}.\\ Intent: {\bf in}.\\ @@ -138,7 +138,7 @@ Specified as: a logical variable, default value: true. \begin{enumerate} \item This routine may be called even if a previous call to \verb|psb_info| has returned with $iam=-1$; indeed, it it is the only - routine that may be called with argument \verb|icontxt| in this + routine that may be called with argument \verb|ctxt| in this situation. \item A call to this routine with \verb|close=.true.| implies a call to \verb|MPI_Finalize|, after which no parallel routine may be called. @@ -154,14 +154,14 @@ Specified as: a logical variable, default value: true. \clearpage\subsection{psb\_get\_mpi\_comm --- Get the MPI communicator} \begin{verbatim} -icomm = psb_get_mpi_comm(icontxt) +icomm = psb_get_mpi_comm(ctxt) \end{verbatim} This function returns the MPI communicator associated with a PSBLAS context \begin{description} \item[Type:] Asynchronous. \item[\bf On Entry ] -\item[icontxt] the communication context identifying the virtual +\item[ctxt] the communication context identifying the virtual parallel machine.\\ Scope: {\bf global}.\\ Type: {\bf required}.\\ @@ -184,14 +184,14 @@ is deprecated. \clearpage\subsection{psb\_get\_mpi\_rank --- Get the MPI rank} \begin{verbatim} -rank = psb_get_mpi_rank(icontxt, id) +rank = psb_get_mpi_rank(ctxt, id) \end{verbatim} This function returns the MPI rank of the PSBLAS process $id$ \begin{description} \item[Type:] Asynchronous. \item[\bf On Entry ] -\item[icontxt] the communication context identifying the virtual +\item[ctxt] the communication context identifying the virtual parallel machine.\\ Scope: {\bf global}.\\ Type: {\bf required}.\\ @@ -238,7 +238,7 @@ Returned as: a \verb|real(psb_dpk_)| variable. environment} \begin{verbatim} -call psb_barrier(icontxt) +call psb_barrier(ctxt) \end{verbatim} This subroutine acts as an explicit synchronization point for the PSBLAS @@ -246,7 +246,7 @@ parallel virtual machine. \begin{description} \item[Type:] Synchronous. \item[\bf On Entry ] -\item[icontxt] the communication context identifying the virtual +\item[ctxt] the communication context identifying the virtual parallel machine.\\ Scope: {\bf global}.\\ Type: {\bf required}.\\ @@ -258,14 +258,14 @@ Specified as: an integer variable. \clearpage\subsection{psb\_abort --- Abort a computation} \begin{verbatim} -call psb_abort(icontxt) +call psb_abort(ctxt) \end{verbatim} This subroutine aborts computation on the parallel virtual machine. \begin{description} \item[Type:] Asynchronous. \item[\bf On Entry ] -\item[icontxt] the communication context identifying the virtual +\item[ctxt] the communication context identifying the virtual parallel machine.\\ Scope: {\bf global}.\\ Type: {\bf required}.\\ @@ -280,7 +280,7 @@ Specified as: an integer variable. \clearpage\subsection{psb\_bcast --- Broadcast data} \begin{verbatim} -call psb_bcast(icontxt, dat, root) +call psb_bcast(ctxt, dat, root) \end{verbatim} This subroutine implements a broadcast operation based on the @@ -288,7 +288,7 @@ underlying communication library. \begin{description} \item[Type:] Synchronous. \item[\bf On Entry ] -\item[icontxt] the communication context identifying the virtual +\item[ctxt] the communication context identifying the virtual parallel machine.\\ Scope: {\bf global}.\\ Type: {\bf required}.\\ @@ -325,7 +325,7 @@ Type, kind, rank and size must agree on all processes. \clearpage\subsection{psb\_sum --- Global sum} \begin{verbatim} -call psb_sum(icontxt, dat, root) +call psb_sum(ctxt, dat, root) \end{verbatim} This subroutine implements a sum reduction operation based on the @@ -333,7 +333,7 @@ underlying communication library. \begin{description} \item[Type:] Synchronous. \item[\bf On Entry ] -\item[icontxt] the communication context identifying the virtual +\item[ctxt] the communication context identifying the virtual parallel machine.\\ Scope: {\bf global}.\\ Type: {\bf required}.\\ @@ -379,7 +379,7 @@ Type, kind, rank and size must agree on all processes. \clearpage\subsection{psb\_max --- Global maximum} \begin{verbatim} -call psb_max(icontxt, dat, root) +call psb_max(ctxt, dat, root) \end{verbatim} This subroutine implements a maximum valuereduction @@ -387,7 +387,7 @@ operation based on the underlying communication library. \begin{description} \item[Type:] Synchronous. \item[\bf On Entry ] -\item[icontxt] the communication context identifying the virtual +\item[ctxt] the communication context identifying the virtual parallel machine.\\ Scope: {\bf global}.\\ Type: {\bf required}.\\ @@ -432,7 +432,7 @@ Type, kind, rank and size must agree on all processes. \clearpage\subsection{psb\_min --- Global minimum} \begin{verbatim} -call psb_min(icontxt, dat, root) +call psb_min(ctxt, dat, root) \end{verbatim} This subroutine implements a minimum value reduction @@ -440,7 +440,7 @@ operation based on the underlying communication library. \begin{description} \item[Type:] Synchronous. \item[\bf On Entry ] -\item[icontxt] the communication context identifying the virtual +\item[ctxt] the communication context identifying the virtual parallel machine.\\ Scope: {\bf global}.\\ Type: {\bf required}.\\ @@ -485,7 +485,7 @@ Type, kind, rank and size must agree on all processes. \clearpage\subsection{psb\_amx --- Global maximum absolute value} \begin{verbatim} -call psb_amx(icontxt, dat, root) +call psb_amx(ctxt, dat, root) \end{verbatim} This subroutine implements a maximum absolute value reduction @@ -493,7 +493,7 @@ operation based on the underlying communication library. \begin{description} \item[Type:] Synchronous. \item[\bf On Entry ] -\item[icontxt] the communication context identifying the virtual +\item[ctxt] the communication context identifying the virtual parallel machine.\\ Scope: {\bf global}.\\ Type: {\bf required}.\\ @@ -538,7 +538,7 @@ Type, kind, rank and size must agree on all processes. \clearpage\subsection{psb\_amn --- Global minimum absolute value} \begin{verbatim} -call psb_amn(icontxt, dat, root) +call psb_amn(ctxt, dat, root) \end{verbatim} This subroutine implements a minimum absolute value reduction @@ -546,7 +546,7 @@ operation based on the underlying communication library. \begin{description} \item[Type:] Synchronous. \item[\bf On Entry ] -\item[icontxt] the communication context identifying the virtual +\item[ctxt] the communication context identifying the virtual parallel machine.\\ Scope: {\bf global}.\\ Type: {\bf required}.\\ @@ -591,7 +591,7 @@ Type, kind, rank and size must agree on all processes. \clearpage\subsection{psb\_nrm2 --- Global 2-norm reduction} \begin{verbatim} -call psb_nrm2(icontxt, dat, root) +call psb_nrm2(ctxt, dat, root) \end{verbatim} This subroutine implements a 2-norm value reduction @@ -599,7 +599,7 @@ operation based on the underlying communication library. \begin{description} \item[Type:] Synchronous. \item[\bf On Entry ] -\item[icontxt] the communication context identifying the virtual +\item[ctxt] the communication context identifying the virtual parallel machine.\\ Scope: {\bf global}.\\ Type: {\bf required}.\\ @@ -651,14 +651,14 @@ Kind, rank and size must agree on all processes. \clearpage\subsection{psb\_snd --- Send data} \begin{verbatim} -call psb_snd(icontxt, dat, dst, m) +call psb_snd(ctxt, dat, dst, m) \end{verbatim} This subroutine sends a packet of data to a destination. \begin{description} \item[Type:] Synchronous: see usage notes. \item[\bf On Entry ] -\item[icontxt] the communication context identifying the virtual +\item[ctxt] the communication context identifying the virtual parallel machine.\\ Scope: {\bf global}.\\ Type: {\bf required}.\\ @@ -702,14 +702,14 @@ same value on sending and receiving processes. \clearpage\subsection{psb\_rcv --- Receive data} \begin{verbatim} -call psb_rcv(icontxt, dat, src, m) +call psb_rcv(ctxt, dat, src, m) \end{verbatim} This subroutine receives a packet of data to a destination. \begin{description} \item[Type:] Synchronous: see usage notes. \item[\bf On Entry ] -\item[icontxt] the communication context identifying the virtual +\item[ctxt] the communication context identifying the virtual parallel machine.\\ Scope: {\bf global}.\\ Type: {\bf required}.\\ diff --git a/docs/src/psbrout.tex b/docs/src/psbrout.tex index fbe0d3da..37f84ec8 100644 --- a/docs/src/psbrout.tex +++ b/docs/src/psbrout.tex @@ -215,7 +215,7 @@ An integer value; 0 means no error has been detected. vres(1) = psb_gedot(x1,y1,desc_a,info,global=.false.) vres(2) = psb_gedot(x2,y2,desc_a,info,global=.false.) vres(3) = psb_gedot(x3,y3,desc_a,info,global=.false.) - call psb_sum(ictxt,vres(1:3)) + call psb_sum(ctxt,vres(1:3)) \end{lstlisting} In this way the global communication, which for small sizes is a latency-bound operation, is invoked only once. @@ -391,7 +391,7 @@ An integer value; 0 means no error has been detected. vres(1) = psb_geamax(x1,desc_a,info,global=.false.) vres(2) = psb_geamax(x2,desc_a,info,global=.false.) vres(3) = psb_geamax(x3,desc_a,info,global=.false.) - call psb_amx(ictxt,vres(1:3)) + call psb_amx(ctxt,vres(1:3)) \end{lstlisting} In this way the global communication, which for small sizes is a latency-bound operation, is invoked only once. @@ -544,7 +544,7 @@ An integer value; 0 means no error has been detected. vres(1) = psb_geasum(x1,desc_a,info,global=.false.) vres(2) = psb_geasum(x2,desc_a,info,global=.false.) vres(3) = psb_geasum(x3,desc_a,info,global=.false.) - call psb_sum(ictxt,vres(1:3)) + call psb_sum(ctxt,vres(1:3)) \end{lstlisting} In this way the global communication, which for small sizes is a latency-bound operation, is invoked only once. @@ -714,7 +714,7 @@ An integer value; 0 means no error has been detected. vres(1) = psb_genrm2(x1,desc_a,info,global=.false.) vres(2) = psb_genrm2(x2,desc_a,info,global=.false.) vres(3) = psb_genrm2(x3,desc_a,info,global=.false.) - call psb_nrm2(ictxt,vres(1:3)) + call psb_nrm2(ctxt,vres(1:3)) \end{lstlisting} In this way the global communication, which for small sizes is a latency-bound operation, is invoked only once. diff --git a/prec/Makefile b/prec/Makefile index e3b727b7..ec5892fe 100644 --- a/prec/Makefile +++ b/prec/Makefile @@ -12,12 +12,20 @@ MODOBJS=psb_prec_const_mod.o\ psb_d_diagprec.o psb_d_nullprec.o psb_d_bjacprec.o psb_s_ilu_fact_mod.o \ psb_s_diagprec.o psb_s_nullprec.o psb_s_bjacprec.o psb_d_ilu_fact_mod.o \ psb_c_diagprec.o psb_c_nullprec.o psb_c_bjacprec.o psb_c_ilu_fact_mod.o \ - psb_z_diagprec.o psb_z_nullprec.o psb_z_bjacprec.o psb_z_ilu_fact_mod.o - + psb_z_diagprec.o psb_z_nullprec.o psb_z_bjacprec.o psb_z_ilu_fact_mod.o \ + psb_c_ainv_fact_mod.o psb_d_ainv_fact_mod.o psb_s_ainv_fact_mod.o psb_z_ainv_fact_mod.o \ + psb_c_ainv_tools_mod.o psb_d_ainv_tools_mod.o psb_s_ainv_tools_mod.o psb_z_ainv_tools_mod.o \ + psb_ainv_tools_mod.o \ + psb_biconjg_mod.o psb_c_biconjg_mod.o psb_d_biconjg_mod.o psb_s_biconjg_mod.o \ + psb_z_biconjg_mod.o \ + psb_c_invt_fact_mod.o psb_d_invt_fact_mod.o psb_s_invt_fact_mod.o \ + psb_z_invt_fact_mod.o\ + psb_c_invk_fact_mod.o psb_d_invk_fact_mod.o psb_s_invk_fact_mod.o \ + psb_z_invk_fact_mod.o LIBNAME=$(PRECLIBNAME) COBJS= -FINCLUDES=$(FMFLAG). $(FMFLAG)$(MODDIR) +FINCLUDES=$(FMFLAG). $(FMFLAG)$(MODDIR) OBJS=$(F90OBJS) $(COBJS) $(MPFOBJS) $(MODOBJS) lib: $(OBJS) impld @@ -43,15 +51,30 @@ psb_d_prec_mod.o: psb_prec_type.o psb_c_prec_mod.o: psb_prec_type.o psb_z_prec_mod.o: psb_prec_type.o psb_prec_type.o: psb_s_prec_type.o psb_d_prec_type.o psb_c_prec_type.o psb_z_prec_type.o -psb_prec_mod.o: psb_s_prec_mod.o psb_d_prec_mod.o psb_c_prec_mod.o psb_z_prec_mod.o -psb_s_bjacprec.o psb_s_diagprec.o psb_s_nullprec.o: psb_prec_mod.o psb_s_base_prec_mod.o -psb_d_bjacprec.o psb_d_diagprec.o psb_d_nullprec.o: psb_prec_mod.o psb_d_base_prec_mod.o -psb_c_bjacprec.o psb_c_diagprec.o psb_c_nullprec.o: psb_prec_mod.o psb_c_base_prec_mod.o +psb_prec_mod.o: psb_s_prec_mod.o psb_d_prec_mod.o psb_c_prec_mod.o psb_z_prec_mod.o +psb_s_bjacprec.o psb_s_diagprec.o psb_s_nullprec.o: psb_prec_mod.o psb_s_base_prec_mod.o +psb_d_bjacprec.o psb_d_diagprec.o psb_d_nullprec.o: psb_prec_mod.o psb_d_base_prec_mod.o +psb_c_bjacprec.o psb_c_diagprec.o psb_c_nullprec.o: psb_prec_mod.o psb_c_base_prec_mod.o psb_z_bjacprec.o psb_z_diagprec.o psb_z_nullprec.o: psb_prec_mod.o psb_z_base_prec_mod.o -psb_s_bjacprec.o: psb_s_ilu_fact_mod.o -psb_d_bjacprec.o: psb_d_ilu_fact_mod.o -psb_c_bjacprec.o: psb_c_ilu_fact_mod.o -psb_z_bjacprec.o: psb_z_ilu_fact_mod.o +psb_s_bjacprec.o: psb_s_ilu_fact_mod.o psb_s_ainv_fact_mod.o psb_s_invk_fact_mod.o psb_s_invt_fact_mod.o +psb_d_bjacprec.o: psb_d_ilu_fact_mod.o psb_d_ainv_fact_mod.o psb_d_invk_fact_mod.o psb_d_invt_fact_mod.o +psb_c_bjacprec.o: psb_c_ilu_fact_mod.o psb_c_ainv_fact_mod.o psb_c_invk_fact_mod.o psb_c_invt_fact_mod.o +psb_z_bjacprec.o: psb_z_ilu_fact_mod.o psb_z_ainv_fact_mod.o psb_z_invk_fact_mod.o psb_z_invt_fact_mod.o +psb_d_ainv_fact_mod.o: psb_prec_const_mod.o psb_ainv_tools_mod.o +psb_s_ainv_fact_mod.o: psb_prec_const_mod.o psb_ainv_tools_mod.o +psb_c_ainv_fact_mod.o: psb_prec_const_mod.o psb_ainv_tools_mod.o +psb_z_ainv_fact_mod.o: psb_prec_const_mod.o psb_ainv_tools_mod.o +psb_ainv_tools_mod.o: psb_c_ainv_tools_mod.o psb_d_ainv_tools_mod.o psb_s_ainv_tools_mod.o psb_z_ainv_tools_mod.o +psb_biconjg_mod.o: psb_prec_const_mod.o psb_c_biconjg_mod.o \ + psb_d_biconjg_mod.o psb_s_biconjg_mod.o psb_z_biconjg_mod.o +psb_c_invt_fact_mod.o: psb_prec_const_mod.o psb_c_ilu_fact_mod.o +psb_d_invt_fact_mod.o: psb_prec_const_mod.o psb_d_ilu_fact_mod.o +psb_s_invt_fact_mod.o: psb_prec_const_mod.o psb_s_ilu_fact_mod.o +psb_z_invt_fact_mod.o: psb_prec_const_mod.o psb_z_ilu_fact_mod.o +psb_c_invk_fact_mod.o: psb_prec_const_mod.o psb_c_ilu_fact_mod.o +psb_d_invk_fact_mod.o: psb_prec_const_mod.o psb_d_ilu_fact_mod.o +psb_s_invk_fact_mod.o: psb_prec_const_mod.o psb_s_ilu_fact_mod.o +psb_z_invk_fact_mod.o: psb_prec_const_mod.o psb_z_ilu_fact_mod.o veryclean: clean /bin/rm -f $(LIBNAME) *$(.mod) @@ -60,4 +83,3 @@ iclean: cd impl && $(MAKE) clean clean: iclean /bin/rm -f $(OBJS) $(LOCAL_MODS) - diff --git a/prec/impl/Makefile b/prec/impl/Makefile index 80e87a54..57d7c304 100644 --- a/prec/impl/Makefile +++ b/prec/impl/Makefile @@ -17,13 +17,32 @@ OBJS=psb_s_prec_type_impl.o psb_d_prec_type_impl.o \ psb_cprecbld.o psb_cprecset.o psb_cprecinit.o \ psb_z_diagprec_impl.o psb_z_bjacprec_impl.o psb_z_nullprec_impl.o \ psb_zilu_fct.o psb_z_ilu0_fact.o psb_z_iluk_fact.o psb_z_ilut_fact.o \ - psb_zprecbld.o psb_zprecset.o psb_zprecinit.o + psb_zprecbld.o psb_zprecset.o psb_zprecinit.o \ + psb_c_sparsify.o psb_d_sparsify.o psb_s_sparsify.o psb_z_sparsify.o \ + psb_crwclip.o psb_drwclip.o psb_srwclip.o psb_zrwclip.o \ + psb_c_sp_drop.o psb_d_sp_drop.o psb_s_sp_drop.o psb_z_sp_drop.o \ + psb_dsparse_biconjg_llk_noth.o psb_dsparse_biconjg_llk.o \ + psb_dsparse_biconjg_mlk.o psb_dsparse_biconjg_s_ft_llk.o \ + psb_dsparse_biconjg_s_llk.o \ + psb_csparse_biconjg_llk_noth.o psb_csparse_biconjg_llk.o \ + psb_csparse_biconjg_mlk.o psb_csparse_biconjg_s_ft_llk.o \ + psb_csparse_biconjg_s_llk.o \ + psb_zsparse_biconjg_llk_noth.o psb_zsparse_biconjg_llk.o \ + psb_zsparse_biconjg_mlk.o psb_zsparse_biconjg_s_ft_llk.o \ + psb_zsparse_biconjg_s_llk.o \ + psb_ssparse_biconjg_llk_noth.o psb_ssparse_biconjg_llk.o \ + psb_ssparse_biconjg_mlk.o psb_ssparse_biconjg_s_ft_llk.o \ + psb_ssparse_biconjg_s_llk.o \ + psb_d_ainv_bld.o psb_c_ainv_bld.o psb_s_ainv_bld.o \ + psb_z_ainv_bld.o \ + psb_c_invt_fact.o psb_d_invt_fact.o psb_s_invt_fact.o psb_z_invt_fact.o\ + psb_c_invk_fact.o psb_d_invk_fact.o psb_s_invk_fact.o psb_z_invk_fact.o LIBNAME=$(PRECLIBNAME) COBJS= -FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) +FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) -lib: $(OBJS) +lib: $(OBJS) $(AR) $(HERE)/$(LIBNAME) $(OBJS) $(RANLIB) $(HERE)/$(LIBNAME) @@ -31,4 +50,3 @@ veryclean: clean clean: /bin/rm -f $(OBJS) $(LOCAL_MODS) - diff --git a/prec/impl/psb_c_ainv_bld.f90 b/prec/impl/psb_c_ainv_bld.f90 new file mode 100644 index 00000000..5cf2a45e --- /dev/null +++ b/prec/impl/psb_c_ainv_bld.f90 @@ -0,0 +1,225 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG-AINV, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +subroutine psb_c_ainv_bld(a,alg,fillin,thresh,wmat,d,zmat,desc,info,blck,iscale) + + use psb_base_mod + use psb_prec_const_mod + use psb_c_biconjg_mod + + implicit none + + ! Arguments + type(psb_cspmat_type), intent(in), target :: a + integer(psb_ipk_), intent(in) :: fillin,alg + real(psb_spk_), intent(in) :: thresh + type(psb_cspmat_type), intent(inout) :: wmat, zmat + complex(psb_spk_), allocatable :: d(:) + Type(psb_desc_type), Intent(in) :: desc + integer(psb_ipk_), intent(out) :: info + type(psb_cspmat_type), intent(in), optional :: blck + integer(psb_ipk_), intent(in), optional :: iscale + ! + integer(psb_ipk_) :: i, nztota, err_act, n_row, nrow_a + type(psb_c_coo_sparse_mat) :: acoo + type(psb_c_csr_sparse_mat) :: acsr + type(psb_cspmat_type) :: atmp + real(psb_spk_), allocatable :: arws(:), acls(:) + complex(psb_spk_), allocatable :: pq(:), ad(:) + integer(psb_ipk_) :: debug_level, debug_unit + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np,me + integer(psb_ipk_) :: nzrmax, iscale_ + real(psb_spk_) :: sp_thresh + complex(psb_spk_) :: weight + character(len=20) :: name, ch_err + + + info = psb_success_ + name = 'psb_cainv_bld' + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + ctxt = psb_cd_get_context(desc) + call psb_info(ctxt, me, np) + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' start' + + iscale_ = psb_ilu_scale_none_ + if (present(iscale)) iscale_ = iscale + weight = cone + ! + ! Check the memory available to hold the W and Z factors + ! and allocate it if needed + ! + nrow_a = a%get_nrows() + nztota = a%get_nzeros() + + if (present(blck)) then + nztota = nztota + blck%get_nzeros() + end if + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & ': out get_nnzeros',nrow_a,nztota,& + & a%get_nrows(),a%get_ncols(),a%get_nzeros() + + + n_row = desc%get_local_rows() + allocate(pq(n_row),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + + nzrmax = fillin + sp_thresh = thresh + + ! + ! Ok, let's start first with Z (i.e. Upper) + ! + call a%csclip(acoo,info,imax=n_row,jmax=n_row) + call acsr%mv_from_coo(acoo,info) + select case(iscale_) + case(psb_ilu_scale_none_) + ! Ok, do nothing. + + case(psb_ilu_scale_maxval_) + weight = acsr%maxval() + weight = cone/weight + call acsr%scal(weight,info) + + case(psb_ilu_scale_arcsum_) + allocate(arws(n_row),acls(n_row),ad(n_row),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + call acsr%arwsum(arws) + call acsr%aclsum(acls) + ad(1:n_row) = sqrt(sqrt(arws(1:n_row)*acls(1:n_row))) + ad(1:n_row) = cone/ad(1:n_row) + call acsr%scal(ad,info,side='L') + call acsr%scal(ad,info,side='R') + case default + call psb_errpush(psb_err_from_subroutine_,name,a_err='wrong iscale') + goto 9999 + end select + + ! + ! Here for the actual workhorses. + ! Only biconjg is surviving for now.... + ! + call psb_sparse_biconjg(alg,n_row,acsr,pq,& + & zmat,wmat,nzrmax,sp_thresh,info) + ! Done. Hopefully.... + + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='sparse_biconjg') + goto 9999 + end if + call atmp%mv_from(acsr) + + ! + ! Is this right??? + ! + do i=1, n_row + if (abs(pq(i)) < d_epstol) then + pq(i) = cone + else + pq(i) = cone/pq(i) + end if + end do + + select case(iscale_) + case(psb_ilu_scale_none_) + ! Ok, do nothing. + case(psb_ilu_scale_maxval_) + pq(:) = pq(:)*weight + + case(psb_ilu_scale_arcsum_) + call zmat%scal(ad,info,side='L') + call wmat%scal(ad,info,side='R') + + case default + call psb_errpush(psb_err_from_subroutine_,name,a_err='wrong iscale') + goto 9999 + end select + + call psb_move_alloc(pq,d,info) + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' end' + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_c_ainv_bld diff --git a/prec/impl/psb_c_bjacprec_impl.f90 b/prec/impl/psb_c_bjacprec_impl.f90 index dbd890c7..32baf385 100644 --- a/prec/impl/psb_c_bjacprec_impl.f90 +++ b/prec/impl/psb_c_bjacprec_impl.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,13 +27,13 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! subroutine psb_c_bjac_dump(prec,info,prefix,head) use psb_base_mod use psb_c_bjacprec, psb_protect_name => psb_c_bjac_dump - implicit none + implicit none class(psb_c_bjac_prec_type), intent(in) :: prec integer(psb_ipk_), intent(out) :: info character(len=*), intent(in), optional :: prefix,head @@ -43,13 +43,13 @@ subroutine psb_c_bjac_dump(prec,info,prefix,head) character(len=80) :: prefix_ character(len=120) :: fname ! len should be at least 20 more than - ! len of prefix_ + ! len of prefix_ info = 0 ctxt = prec%get_ctxt() call psb_info(ctxt,iam,np) - if (present(prefix)) then + if (present(prefix)) then prefix_ = trim(prefix(1:min(len(prefix),len(prefix_)))) else prefix_ = "dump_fact_c" @@ -74,7 +74,7 @@ end subroutine psb_c_bjac_dump subroutine psb_c_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) use psb_base_mod use psb_c_bjacprec, psb_protect_name => psb_c_bjac_apply_vect - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_c_bjac_prec_type), intent(inout) :: prec complex(psb_spk_),intent(in) :: alpha,beta @@ -118,12 +118,12 @@ subroutine psb_c_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) n_row = desc_data%get_local_rows() n_col = desc_data%get_local_cols() - if (x%get_nrows() < n_row) then + if (x%get_nrows() < n_row) then info = 36; ierr(1) = 2; ierr(2) = n_row; call psb_errpush(info,name,i_err=ierr) goto 9999 end if - if (y%get_nrows() < n_row) then + if (y%get_nrows() < n_row) then info = 36; ierr(1) = 3; ierr(2) = n_row; call psb_errpush(info,name,i_err=ierr) goto 9999 @@ -140,9 +140,9 @@ subroutine psb_c_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) end if - if (n_col <= size(work)) then + if (n_col <= size(work)) then ww => work(1:n_col) - if ((4*n_col+n_col) <= size(work)) then + if ((4*n_col+n_col) <= size(work)) then aux => work(n_col+1:) else allocate(aux(4*n_col),stat=info) @@ -152,19 +152,19 @@ subroutine psb_c_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) allocate(ww(n_col),aux(4*n_col),stat=info) endif - if (info /= psb_success_) then + if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 + goto 9999 end if do_alloc_wrk = .not.prec%is_allocated_wrk() if (do_alloc_wrk) call prec%allocate_wrk(info,vmold=x%v) associate (wv => prec%wrk(1), wv1 => prec%wrk(2)) - + select case(prec%iprcparm(psb_f_type_)) - case(psb_f_ilu_n_) - + case(psb_f_ilu_n_,psb_f_ilu_k_,psb_f_ilu_t_) + select case(trans_) case('N') call psb_spsm(cone,prec%av(psb_l_pr_),x,czero,wv,desc_data,info,& @@ -172,31 +172,58 @@ subroutine psb_c_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) if(info == psb_success_) call psb_spsm(alpha,prec%av(psb_u_pr_),wv,& & beta,y,desc_data,info,& & trans=trans_,scale='U',choice=psb_none_, work=aux) - + case('T') call psb_spsm(cone,prec%av(psb_u_pr_),x,czero,wv,desc_data,info,& & trans=trans_,scale='L',diag=prec%dv,choice=psb_none_, work=aux) if(info == psb_success_) call psb_spsm(alpha,prec%av(psb_l_pr_),wv,& & beta,y,desc_data,info,& & trans=trans_,scale='U',choice=psb_none_,work=aux) - + case('C') - + call psb_spsm(cone,prec%av(psb_u_pr_),x,czero,wv,desc_data,info,& & trans=trans_,scale='U',choice=psb_none_, work=aux) - + call wv1%mlt(cone,prec%dv,wv,czero,info,conjgx=trans_) - + if(info == psb_success_) call psb_spsm(alpha,prec%av(psb_l_pr_),wv1,& & beta,y,desc_data,info,& & trans=trans_,scale='U',choice=psb_none_,work=aux) - + + end select + if (info /= psb_success_) then + ch_err="psb_spsm" + goto 9999 + end if + + case(psb_f_ainv_,psb_f_invt_,psb_f_invk_) + ! Application of approximate inverse preconditioner, just some spmm + + select case(trans_) + case('N') + call psb_spmm(cone,prec%av(psb_l_pr_),x,czero,wv,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + + if (info == psb_success_) call wv1%mlt(cone,prec%dv,wv,czero,info) + if(info == psb_success_) & + & call psb_spmm(alpha,prec%av(psb_u_pr_),wv1,& + & beta,y,desc_data,info, trans=trans_, work=aux,doswap=.false.) + + case('T','C') + call psb_spmm(cone,prec%av(psb_l_pr_),x,czero,wv,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + if (info == psb_success_) call wv1%mlt(cone,prec%dv,wv,czero,info) + if (info == psb_success_) & + & call psb_spmm(alpha,prec%av(psb_u_pr_),wv1, & + & beta,y,desc_data,info,trans=trans_,work=aux,doswap=.false.) + end select - if (info /= psb_success_) then + if (info /= psb_success_) then ch_err="psb_spsm" goto 9999 end if - + case default info = psb_err_internal_error_ call psb_errpush(info,name,a_err='Invalid factorization') @@ -204,12 +231,12 @@ subroutine psb_c_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) end select end associate - + call psb_halo(y,desc_data,info,data=psb_comm_mov_) if (do_alloc_wrk) call prec%free_wrk(info) - if (n_col <= size(work)) then - if ((4*n_col+n_col) <= size(work)) then + if (n_col <= size(work)) then + if ((4*n_col+n_col) <= size(work)) then else deallocate(aux) endif @@ -231,7 +258,7 @@ end subroutine psb_c_bjac_apply_vect subroutine psb_c_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) use psb_base_mod use psb_c_bjacprec, psb_protect_name => psb_c_bjac_apply - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_c_bjac_prec_type), intent(inout) :: prec complex(psb_spk_),intent(in) :: alpha,beta @@ -244,6 +271,7 @@ subroutine psb_c_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) ! Local variables integer(psb_ipk_) :: n_row,n_col complex(psb_spk_), pointer :: ww(:), aux(:) + type(psb_d_vect_type) :: tx,ty type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me integer(psb_ipk_) :: err_act, ierr(5) @@ -273,12 +301,12 @@ subroutine psb_c_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) n_row = desc_data%get_local_rows() n_col = desc_data%get_local_cols() - if (size(x) < n_row) then + if (size(x) < n_row) then info = 36; ierr(1) = 2; ierr(2) = n_row; call psb_errpush(info,name,i_err=ierr) goto 9999 end if - if (size(y) < n_row) then + if (size(y) < n_row) then info = 36; ierr(1) = 3; ierr(2) = n_row; call psb_errpush(info,name,i_err=ierr) goto 9999 @@ -295,29 +323,29 @@ subroutine psb_c_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) end if - if (n_col <= size(work)) then + if (n_col <= size(work)) then ww => work(1:n_col) - if ((4*n_col+n_col) <= size(work)) then + if ((4*n_col+n_col) <= size(work)) then aux => work(n_col+1:) else allocate(aux(4*n_col),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 + goto 9999 end if endif else allocate(ww(n_col),aux(4*n_col),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 + goto 9999 end if endif select case(prec%iprcparm(psb_f_type_)) - case(psb_f_ilu_n_) + case(psb_f_ilu_n_,psb_f_ilu_k_,psb_f_ilu_t_) select case(trans_) case('N') @@ -344,11 +372,42 @@ subroutine psb_c_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) & trans=trans_,scale='U',choice=psb_none_,work=aux) end select - if (info /= psb_success_) then + if (info /= psb_success_) then ch_err="psb_spsm" goto 9999 end if + case(psb_f_ainv_,psb_f_invt_,psb_f_invk_) + ! Application of approximate inverse preconditioner, just some spmm + + select case(trans_) + + case('N') + call psb_spmm(cone,prec%av(psb_l_pr_),x,czero,ww,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + ww(1:n_row) = ww(1:n_row) * prec%dv%v%v(1:n_row) + if (info == psb_success_) & + & call psb_spmm(alpha,prec%av(psb_u_pr_),ww,beta,y,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + + case('T') + call psb_spmm(cone,prec%av(psb_u_pr_),x,czero,ww,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + ww(1:n_row) = ww(1:n_row) * prec%dv%v%v(1:n_row) + if (info == psb_success_) & + & call psb_spmm(alpha,prec%av(psb_l_pr_),ww,beta,y,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + + case('C') + call psb_spmm(cone,prec%av(psb_u_pr_),x,czero,ww,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + ww(1:n_row) = ww(1:n_row) * conjg(prec%dv%v%v(1:n_row)) + if (info == psb_success_) & + & call psb_spmm(alpha,prec%av(psb_l_pr_),ww,beta,y,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + + end select + case default info = psb_err_internal_error_ @@ -358,8 +417,8 @@ subroutine psb_c_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) call psb_halo(y,desc_data,info,data=psb_comm_mov_) - if (n_col <= size(work)) then - if ((4*n_col+n_col) <= size(work)) then + if (n_col <= size(work)) then + if ((4*n_col+n_col) <= size(work)) then else deallocate(aux) endif @@ -397,11 +456,26 @@ subroutine psb_c_bjac_precinit(prec,info) call psb_Errpush(info,name) goto 9999 end if + call psb_realloc(psb_rfpsz,prec%rprcparm,info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_Errpush(info,name) + goto 9999 + end if prec%iprcparm(:) = 0 prec%iprcparm(psb_p_type_) = psb_bjac_ prec%iprcparm(psb_f_type_) = psb_f_ilu_n_ prec%iprcparm(psb_ilu_fill_in_) = 0 + prec%iprcparm(psb_ilu_ialg_) = psb_ilu_n_ + prec%iprcparm(psb_ilu_scale_) = psb_ilu_scale_none_ + prec%iprcparm(psb_inv_fillin_) = 0 + prec%iprcparm(psb_ainv_alg_) = psb_ainv_llk_ + + + prec%rprcparm(:) = 0 + prec%rprcparm(psb_fact_eps_) = 1E-1_psb_spk_ + prec%rprcparm(psb_inv_thresh_) = 1E-1_psb_spk_ call psb_erractionrestore(err_act) @@ -416,7 +490,8 @@ end subroutine psb_c_bjac_precinit subroutine psb_c_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) use psb_base_mod - use psb_prec_mod, only : psb_ilu_fct + use psb_c_ilu_fact_mod + use psb_c_ainv_fact_mod use psb_c_bjacprec, psb_protect_name => psb_c_bjac_precbld Implicit None @@ -428,12 +503,13 @@ subroutine psb_c_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) class(psb_c_base_vect_type), intent(in), optional :: vmold class(psb_i_base_vect_type), intent(in), optional :: imold - ! .. Local Scalars .. - integer(psb_ipk_) :: i, m + ! .. Local Scalars .. + integer(psb_ipk_) :: i, m, ialg, fill_in, iscale, inv_fill, iinvalg integer(psb_ipk_) :: ierr(5) character :: trans, unitd - type(psb_c_csr_sparse_mat), allocatable :: lf, uf + type(psb_cspmat_type), allocatable :: lf, uf complex(psb_spk_), allocatable :: dd(:) + real(psb_spk_) :: fact_eps, inv_thresh integer(psb_ipk_) :: nztota, err_act, n_row, nrow_a,n_col, nhalo type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me @@ -462,19 +538,114 @@ subroutine psb_c_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) trans = 'N' unitd = 'U' + ! We check if all the information contained in the preconditioner structure + ! are meaningful, otherwise we give an error and get out of the build + ! procedure + ialg = prec%iprcparm(psb_ilu_ialg_) ! Integer for ILU type algorithm + iinvalg = prec%iprcparm(psb_ainv_alg_) ! Integer for AINV type algorithm + iscale = prec%iprcparm(psb_ilu_scale_) ! Integer for scaling of matrix + fact_eps = prec%rprcparm(psb_fact_eps_) ! Drop-tolerance for factorization + inv_thresh = prec%rprcparm(psb_inv_thresh_) ! Drop-tolerance for inverse + fill_in = prec%iprcparm(psb_ilu_fill_in_) ! Fill-In for factorization + inv_fill = prec%iprcparm(psb_inv_fillin_) ! Fill-In for inverse factorization + + ! Check if the type of scaling is known, pay attention that not all the + ! scalings make sense for all the factorization, if something that does not + ! make sense is required the factorization routine will fail in an + ! unnrecoverable way. + if ((iscale == psb_ilu_scale_none_).or.& + (iscale == psb_ilu_scale_maxval_).or.& + (iscale == psb_ilu_scale_diag_).or.& + (iscale == psb_ilu_scale_arwsum_).or.& + (iscale == psb_ilu_scale_aclsum_).or.& + (iscale == psb_ilu_scale_arcsum_)) then + ! Do nothing: admissible request + else + info=psb_err_from_subroutine_ + ch_err='psb_ilu_scale_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + select case (prec%iprcparm(psb_f_type_)) + case (psb_f_ainv_) + ! Check if the variant for the AINV is known to the library + select case (iinvalg) + case(psb_ainv_llk_,psb_ainv_s_llk_,psb_ainv_s_ft_llk_,psb_ainv_llk_noth_,& + & psb_ainv_mlk_) + ! Do nothing these are okay + case default + info=psb_err_from_subroutine_ + ch_err='psb_ainv_alg_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end select ! AINV Variant + ! Check if the drop-tolerance make sense + if( inv_thresh > 1) then + info=psb_err_from_subroutine_ + ch_err='psb_inv_thresh_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + case (psb_f_ilu_t_) + if (fact_eps > 1) then + ! Check if the drop-tolerance make sense + info=psb_err_from_subroutine_ + ch_err='psb_fact_eps_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + case (psb_f_invt_) + ! Check both tolerances + if (fact_eps > 1) then + info=psb_err_from_subroutine_ + ch_err='psb_fact_eps_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if( inv_thresh > 1) then + info=psb_err_from_subroutine_ + ch_err='psb_inv_thresh_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + case default + end select + + + ! Checks relative to the fill-in parameters + if (prec%iprcparm(psb_f_type_) == psb_f_ilu_n_) then + if(fill_in < 0) then + info=psb_err_from_subroutine_ + ch_err='psb_ilu_fill_in_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + else if (fill_in == 0) then + ! If the requested level of fill is equal to zero, we default to the + ! specialized ILU(0) routine + prec%iprcparm(psb_f_type_) = psb_f_ilu_n_ + end if + end if + ! If no limit on the fill_in is required we allow every fill, this is needed + ! since this quantity is used to allocate the auxiliary vectors for the + ! factorization + if (inv_fill <= 0) inv_fill = m + + ! Select on the type of factorization to be used select case(prec%iprcparm(psb_f_type_)) - case(psb_f_ilu_n_) + case(psb_f_ilu_n_) + ! ILU(0) Factorization: the total number of nonzeros of the factorized matrix + ! is equal to the one of the input matrix - if (allocated(prec%av)) then - if (size(prec%av) < psb_bp_ilu_avsz) then - do i=1,size(prec%av) + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) call prec%av(i)%free() enddo deallocate(prec%av,stat=info) endif end if - if (.not.allocated(prec%av)) then + if (.not.allocated(prec%av)) then allocate(prec%av(psb_max_avsz),stat=info) if (info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) @@ -501,27 +672,27 @@ subroutine psb_c_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) end if allocate(dd(n_row),stat=info) - if (info == psb_success_) then + if (info == psb_success_) then allocate(prec%dv, stat=info) - if (info == 0) then - if (present(vmold)) then - allocate(prec%dv%v,mold=vmold,stat=info) - else - allocate(psb_c_base_vect_type :: prec%dv%v,stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_c_base_vect_type :: prec%dv%v,stat=info) end if end if end if - if (info /= psb_success_) then + if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 + goto 9999 endif - ! This is where we have no renumbering, thus no need - call psb_ilu_fct(a,lf,uf,dd,info) + ! This is where we have no renumbering, thus no need + call psb_ilu0_fact(ialg,a,lf,uf,dd,info) if(info == psb_success_) then - call prec%av(psb_l_pr_)%mv_from(lf) - call prec%av(psb_u_pr_)%mv_from(uf) + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) call prec%av(psb_l_pr_)%set_asb() call prec%av(psb_u_pr_)%set_asb() call prec%av(psb_l_pr_)%trim() @@ -530,12 +701,375 @@ subroutine psb_c_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) ! call move_alloc(dd,prec%d) else info=psb_err_from_subroutine_ - ch_err='psb_ilu_fct' + ch_err='psb_ilu0_fact' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if - case(psb_f_none_) + case(psb_f_ilu_k_) + ! ILU(N) Incomplete LU-factorization with N levels of fill-in. Depending on + ! the type of the variant of the algorithm the may be forgotten or added to + ! the diagonal (MILU) + + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) + call prec%av(i)%free() + enddo + deallocate(prec%av,stat=info) + endif + end if + if (.not.allocated(prec%av)) then + allocate(prec%av(psb_max_avsz),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + endif + + nrow_a = desc_a%get_local_rows() + nztota = a%get_nzeros() + + n_col = desc_a%get_local_cols() + nhalo = n_col-nrow_a + n_row = nrow_a + + allocate(lf,uf,stat=info) + if (info == psb_success_) call lf%allocate(n_row,n_row,nztota) + if (info == psb_success_) call uf%allocate(n_row,n_row,nztota) + + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(dd(n_row),stat=info) + if (info == psb_success_) then + allocate(prec%dv, stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_c_base_vect_type :: prec%dv%v,stat=info) + end if + end if + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + endif + ! This is where we have no renumbering, thus no need + call psb_iluk_fact(fill_in,ialg,a,lf,uf,dd,info) + + if(info == psb_success_) then + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) + call prec%av(psb_l_pr_)%set_asb() + call prec%av(psb_u_pr_)%set_asb() + call prec%av(psb_l_pr_)%trim() + call prec%av(psb_u_pr_)%trim() + call prec%dv%bld(dd) + ! call move_alloc(dd,prec%d) + else + info=psb_err_from_subroutine_ + ch_err='psb_iluk_fact' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(psb_f_ilu_t_) + ! ILU(N,E) Incomplete LU factorization with thresholding and level of fill + + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) + call prec%av(i)%free() + enddo + deallocate(prec%av,stat=info) + endif + end if + if (.not.allocated(prec%av)) then + allocate(prec%av(psb_max_avsz),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + endif + + nrow_a = desc_a%get_local_rows() + nztota = a%get_nzeros() + + n_col = desc_a%get_local_cols() + nhalo = n_col-nrow_a + n_row = nrow_a + + allocate(lf,uf,stat=info) + if (info == psb_success_) call lf%allocate(n_row,n_row,nztota) + if (info == psb_success_) call uf%allocate(n_row,n_row,nztota) + + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(dd(n_row),stat=info) + if (info == psb_success_) then + allocate(prec%dv, stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_c_base_vect_type :: prec%dv%v,stat=info) + end if + end if + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + endif + ! This is where we have no renumbering, thus no need + call psb_ilut_fact(fill_in,fact_eps,a,lf,uf,dd,info,iscale=iscale) + + if(info == psb_success_) then + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) + call prec%av(psb_l_pr_)%set_asb() + call prec%av(psb_u_pr_)%set_asb() + call prec%av(psb_l_pr_)%trim() + call prec%av(psb_u_pr_)%trim() + call prec%dv%bld(dd) + ! call move_alloc(dd,prec%d) + else + info=psb_err_from_subroutine_ + ch_err='psb_ilut_fact' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(psb_f_ainv_) + ! Approximate Inverse Factorizations based on variants of the incomplete + ! biconjugation algorithms + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) + call prec%av(i)%free() + enddo + deallocate(prec%av,stat=info) + endif + end if + if (.not.allocated(prec%av)) then + allocate(prec%av(psb_max_avsz),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + endif + + nrow_a = desc_a%get_local_rows() + nztota = a%get_nzeros() + + n_col = desc_a%get_local_cols() + nhalo = n_col-nrow_a + n_row = nrow_a + + allocate(lf,uf,stat=info) + if (info == psb_success_) call lf%allocate(n_row,n_row,nztota) + if (info == psb_success_) call uf%allocate(n_row,n_row,nztota) + + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(dd(n_row),stat=info) + if (info == psb_success_) then + allocate(prec%dv, stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_c_base_vect_type :: prec%dv%v,stat=info) + end if + end if + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + endif + ! Computing the factorization + call psb_ainv_fact(a,iinvalg,inv_fill,inv_thresh,lf,dd,uf,desc_a,info,iscale=iscale) + + if(info == psb_success_) then + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) + call prec%av(psb_l_pr_)%set_asb() + call prec%av(psb_u_pr_)%set_asb() + call prec%av(psb_l_pr_)%trim() + call prec%av(psb_u_pr_)%trim() + call prec%dv%bld(dd) + ! call move_alloc(dd,prec%d) + else + info=psb_err_from_subroutine_ + ch_err='psb_ilut_fact' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(psb_f_invk_) + ! Approximate Inverse Factorizations based on the sparse inversion of + ! triangular factors of an ILU(n) factorization + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) + call prec%av(i)%free() + enddo + deallocate(prec%av,stat=info) + endif + end if + if (.not.allocated(prec%av)) then + allocate(prec%av(psb_max_avsz),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + endif + + nrow_a = desc_a%get_local_rows() + nztota = a%get_nzeros() + + n_col = desc_a%get_local_cols() + nhalo = n_col-nrow_a + n_row = nrow_a + + allocate(lf,uf,stat=info) + if (info == psb_success_) call lf%allocate(n_row,n_row,nztota) + if (info == psb_success_) call uf%allocate(n_row,n_row,nztota) + + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(dd(n_row),stat=info) + if (info == psb_success_) then + allocate(prec%dv, stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_c_base_vect_type :: prec%dv%v,stat=info) + end if + end if + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + endif + ! Computing the factorization + call psb_invk_fact(a,fill_in, inv_fill,lf,dd,uf,desc_a,info) + + if(info == psb_success_) then + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) + call prec%av(psb_l_pr_)%set_asb() + call prec%av(psb_u_pr_)%set_asb() + call prec%av(psb_l_pr_)%trim() + call prec%av(psb_u_pr_)%trim() + call prec%dv%bld(dd) + ! call move_alloc(dd,prec%d) + else + info=psb_err_from_subroutine_ + ch_err='psb_ilut_fact' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(psb_f_invt_) + ! Approximate Inverse Factorizations based on the sparse inversion of + ! triangular factors of an ILU(eps) factorization + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) + call prec%av(i)%free() + enddo + deallocate(prec%av,stat=info) + endif + end if + if (.not.allocated(prec%av)) then + allocate(prec%av(psb_max_avsz),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + endif + + nrow_a = desc_a%get_local_rows() + nztota = a%get_nzeros() + + n_col = desc_a%get_local_cols() + nhalo = n_col-nrow_a + n_row = nrow_a + + allocate(lf,uf,stat=info) + if (info == psb_success_) call lf%allocate(n_row,n_row,nztota) + if (info == psb_success_) call uf%allocate(n_row,n_row,nztota) + + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(dd(n_row),stat=info) + if (info == psb_success_) then + allocate(prec%dv, stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_c_base_vect_type :: prec%dv%v,stat=info) + end if + end if + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + endif + ! Computing the factorization + call psb_invt_fact(a,fill_in,inv_fill,fact_eps,inv_thresh,lf,dd,uf,& + & desc_a,info) + + if(info == psb_success_) then + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) + call prec%av(psb_l_pr_)%set_asb() + call prec%av(psb_u_pr_)%set_asb() + call prec%av(psb_l_pr_)%trim() + call prec%av(psb_u_pr_)%trim() + call prec%dv%bld(dd) + ! call move_alloc(dd,prec%d) + else + info=psb_err_from_subroutine_ + ch_err='psb_ilut_fact' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(psb_f_none_) info=psb_err_from_subroutine_ ch_err='Inconsistent prec psb_f_none_' call psb_errpush(info,name,a_err=ch_err) @@ -548,7 +1082,7 @@ subroutine psb_c_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) goto 9999 end select - if (present(amold)) then + if (present(amold)) then call prec%av(psb_l_pr_)%cscnv(info,mold=amold) call prec%av(psb_u_pr_)%cscnv(info,mold=amold) end if @@ -567,8 +1101,8 @@ subroutine psb_c_bjac_precseti(prec,what,val,info) Implicit None class(psb_c_bjac_prec_type),intent(inout) :: prec - integer(psb_ipk_), intent(in) :: what - integer(psb_ipk_), intent(in) :: val + integer(psb_ipk_), intent(in) :: what + integer(psb_ipk_), intent(in) :: val integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act, nrow character(len=20) :: name='c_bjac_precset' @@ -576,34 +1110,33 @@ subroutine psb_c_bjac_precseti(prec,what,val,info) call psb_erractionsave(err_act) info = psb_success_ - if (.not.allocated(prec%iprcparm)) then + if (.not.allocated(prec%iprcparm)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") goto 9999 end if select case(what) - case (psb_f_type_) - if (prec%iprcparm(psb_p_type_) /= psb_bjac_) then - write(psb_err_unit,*) 'WHAT is invalid for current preconditioner ',& - & prec%iprcparm(psb_p_type_),& - & 'ignoring user specification' - return - endif + case (psb_f_type_) prec%iprcparm(psb_f_type_) = val - case (psb_ilu_fill_in_) - if ((prec%iprcparm(psb_p_type_) /= psb_bjac_).or.& - & (prec%iprcparm(psb_f_type_) /= psb_f_ilu_n_)) then - write(psb_err_unit,*) 'WHAT is invalid for current preconditioner ',& - & prec%iprcparm(psb_p_type_),& - & 'ignoring user specification' - return - endif + case (psb_ilu_fill_in_) prec%iprcparm(psb_ilu_fill_in_) = val + case (psb_ilu_ialg_) + prec%iprcparm(psb_ilu_ialg_) = val + + case (psb_ilu_scale_) + prec%iprcparm(psb_ilu_scale_) = val + + case (psb_ainv_alg_) + prec%iprcparm(psb_ainv_alg_) = val + + case(psb_inv_fillin_) + prec%iprcparm(psb_inv_fillin_) = val + case default - write(psb_err_unit,*) 'WHAT is invalid, ignoring user specification' + write(psb_err_unit,'(i0," is invalid, ignoring user specification")') what end select @@ -613,3 +1146,45 @@ subroutine psb_c_bjac_precseti(prec,what,val,info) 9999 call psb_error_handler(err_act) return end subroutine psb_c_bjac_precseti + +subroutine psb_c_bjac_precsetr(prec,what,val,info) + + use psb_base_mod + use psb_c_bjacprec, psb_protect_name => psb_c_bjac_precsetr + Implicit None + + class(psb_c_bjac_prec_type),intent(inout) :: prec + integer(psb_ipk_), intent(in) :: what + real(psb_spk_), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act, nrow + character(len=20) :: name='c_bjac_precset' + + call psb_erractionsave(err_act) + + info = psb_success_ + if (.not.allocated(prec%rprcparm)) then + info = 1124 + call psb_errpush(info,name,a_err="preconditioner") + goto 9999 + end if + + select case(what) + + case (psb_fact_eps_) + prec%rprcparm(psb_fact_eps_) = val + + case (psb_inv_thresh_) + prec%rprcparm(psb_inv_thresh_) = val + + case default + write(psb_err_unit,'(i0," is invalid, ignoring user specification")') what + + end select + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_c_bjac_precsetr diff --git a/prec/impl/psb_c_ilu0_fact.f90 b/prec/impl/psb_c_ilu0_fact.f90 index c4097dea..1a3e1046 100644 --- a/prec/impl/psb_c_ilu0_fact.f90 +++ b/prec/impl/psb_c_ilu0_fact.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,21 +27,21 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! +! ! Moved here from MLD2P4, original copyright below. -! -! -! +! +! +! ! MLD2P4 version 2.2 ! MultiLevel Domain Decomposition Parallel Preconditioners Package ! based on PSBLAS (Parallel Sparse BLAS version 3.5) -! -! (C) Copyright 2008-2018 -! -! Salvatore Filippone -! Pasqua D'Ambra -! Daniela di Serafino -! +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -53,7 +53,7 @@ ! 3. The name of the MLD2P4 group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -65,8 +65,8 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! ! File: psb_cilu0_fact.f90 ! ! Subroutine: psb_cilu0_fact @@ -75,7 +75,7 @@ ! ! This routine computes either the ILU(0) or the MILU(0) factorization of ! the diagonal blocks of a distributed matrix. These factorizations are used -! to build the 'base preconditioner' (block-Jacobi preconditioner/solver, +! to build the 'base preconditioner' (block-Jacobi preconditioner/solver, ! Additive Schwarz preconditioner) corresponding to a given level of a ! multilevel preconditioner. ! @@ -83,10 +83,10 @@ ! Y. Saad, Iterative Methods for Sparse Linear Systems, Second Edition, ! SIAM, 2003, Chapter 10. ! -! The local matrix is stored into a and blck, as specified in the description -! of the arguments below. The storage format for both the L and U factors is CSR. -! The diagonal of the U factor is stored separately (actually, the inverse of the -! diagonal entries is stored; this is then managed in the solve stage associated +! The local matrix is stored into a and blck, as specified in the description +! of the arguments below. The storage format for both the L and U factors is CSR. +! The diagonal of the U factor is stored separately (actually, the inverse of the +! diagonal entries is stored; this is then managed in the solve stage associated ! to the ILU(0)/MILU(0) factorization). ! ! The routine copies and factors "on the fly" from a and blck into l (L factor), @@ -94,7 +94,7 @@ ! ! This implementation of ILU(0)/MILU(0) is faster than the implementation in ! psb_ziluk_fct (the latter routine performs the more general ILU(k)/MILU(k)). -! +! ! ! Arguments: ! ialg - integer, input. @@ -121,7 +121,7 @@ ! factorization. ! Note: its allocation is managed by the calling routine psb_ilu_bld, ! hence it cannot be only intent(out). -! info - integer, output. +! info - integer, output. ! Error code. ! blck - type(psb_cspmat_type), input, optional, target. ! The sparse matrix structure containing the remote rows of the @@ -129,7 +129,7 @@ ! to build an Additive Schwarz base preconditioner with overlap ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck is empty. -! +! subroutine psb_cilu0_fact(ialg,a,l,u,d,info,blck, upd) use psb_base_mod @@ -157,30 +157,30 @@ subroutine psb_cilu0_fact(ialg,a,l,u,d,info,blck, upd) info = psb_success_ call psb_erractionsave(err_act) - ! + ! ! Point to / allocate memory for the incomplete factorization ! - if (present(blck)) then + if (present(blck)) then blck_ => blck else - allocate(blck_,stat=info) - if (info == psb_success_) call blck_%csall(izero,izero,info,ione) + allocate(blck_,stat=info) + if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR') if (info /= psb_success_) then info=psb_err_from_subroutine_ - ch_err='csall' + ch_err='allocate' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if endif - if (present(upd)) then - upd_ = psb_toupper(upd) + if (present(upd)) then + upd_ = psb_toupper(upd) else upd_ = 'F' end if - + m = a%get_nrows() + blck_%get_nrows() if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.& - & (m > size(d)) ) then + & (m > size(d)) ) then write(0,*) 'Wrong allocation status for L,D,U? ',& & l%get_nrows(),size(d),u%get_nrows() info = -1 @@ -212,12 +212,12 @@ subroutine psb_cilu0_fact(ialg,a,l,u,d,info,blck, upd) call u%set_triangle() call u%set_unit() call u%set_upper() - + ! ! Nullify pointer / deallocate memory ! - if (present(blck)) then - blck_ => null() + if (present(blck)) then + blck_ => null() else call blck_%free() if(info.ne.0) then @@ -226,7 +226,7 @@ subroutine psb_cilu0_fact(ialg,a,l,u,d,info,blck, upd) call psb_errpush(info,name,a_err=ch_err) goto 9999 end if - deallocate(blck_) + deallocate(blck_) endif call psb_erractionrestore(err_act) @@ -243,9 +243,9 @@ contains ! Version: complex ! Note: internal subroutine of psb_cilu0_fact. ! - ! This routine computes either the ILU(0) or the MILU(0) factorization of the - ! diagonal blocks of a distributed matrix. - ! These factorizations are used to build the 'base preconditioner' + ! This routine computes either the ILU(0) or the MILU(0) factorization of the + ! diagonal blocks of a distributed matrix. + ! These factorizations are used to build the 'base preconditioner' ! (block-Jacobi preconditioner/solver, Additive Schwarz ! preconditioner) corresponding to a given level of a multilevel preconditioner. ! @@ -257,7 +257,7 @@ contains ! ! The routine copies and factors "on the fly" from the sparse matrix structures a ! and b into the arrays lval, uval, d (L, U without its diagonal, diagonal of U). - ! + ! ! ! Arguments: ! ialg - integer, input. @@ -296,7 +296,7 @@ contains ! according to the CSR storage format. ! lirp - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the L factor in lval, according to the CSR storage format. + ! of the L factor in lval, according to the CSR storage format. ! uval - complex(psb_spk_), dimension(:), input/output. ! The U factor in the incomplete factorization. ! The entries of U are stored according to the CSR format. @@ -305,18 +305,18 @@ contains ! according to the CSR storage format. ! uirp - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the U factor in uval, according to the CSR storage format. + ! of the U factor in uval, according to the CSR storage format. ! l1 - integer, output. ! The number of nonzero entries in lval. ! l2 - integer, output. ! The number of nonzero entries in uval. - ! info - integer, output. + ! info - integer, output. ! Error code. ! subroutine psb_cilu0_factint(ialg,a,b,& & d,lval,lja,lirp,uval,uja,uirp,l1,l2,upd,info) - implicit none + implicit none ! Arguments integer(psb_ipk_), intent(in) :: ialg @@ -332,7 +332,7 @@ contains complex(psb_spk_) :: dia,temp integer(psb_ipk_), parameter :: nrb=16 type(psb_c_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) + integer(psb_ipk_) :: int_err(5) character(len=20) :: name, ch_err name='psb_cilu0_factint' @@ -346,10 +346,10 @@ contains select case(ialg) case(psb_ilu_n_,psb_milu_n_) - ! Ok + ! Ok case default info=psb_err_input_asize_invalid_i_ - call psb_errpush(info,name,& + call psb_errpush(info,name,& & i_err=(/ione,ialg,izero,izero,izero/)) goto 9999 end select @@ -363,7 +363,7 @@ contains end if m = ma+mb - if (psb_toupper(upd) == 'F' ) then + if (psb_toupper(upd) == 'F' ) then lirp(1) = 1 uirp(1) = 1 l1 = 0 @@ -379,14 +379,14 @@ contains if (i <= ma) then ! ! Copy the i-th local row of the matrix, stored in a, - ! into lval/d(i)/uval + ! into lval/d(i)/uval ! call ilu_copyin(i,ma,a,i,ione,m,l1,lja,lval,& & d(i),l2,uja,uval,ktrw,trw,upd) else ! ! Copy the i-th local row of the matrix, stored in b - ! (as (i-ma)-th row), into lval/d(i)/uval + ! (as (i-ma)-th row), into lval/d(i)/uval ! call ilu_copyin(i-ma,mb,b,i,ione,m,l1,lja,lval,& & d(i),l2,uja,uval,ktrw,trw,upd) @@ -437,7 +437,7 @@ contains ! dia = dia - temp*uval(jj) cycle updateloop - ! + ! else if (j > i) then ! ! search u(i,*) (i-th row of U) for a matching index j @@ -454,23 +454,23 @@ contains end if enddo end if - ! - ! If we get here we missed the cycle updateloop, which means + ! + ! If we get here we missed the cycle updateloop, which means ! that this entry does not match; thus we accumulate on the ! diagonal for MILU(0). ! - if (ialg == psb_milu_n_) then + if (ialg == psb_milu_n_) then dia = dia - temp*uval(jj) end if enddo updateloop enddo - ! + ! ! Check the pivot size - ! + ! if (abs(dia) < s_epstol) then ! ! Too small pivot: unstable factorization - ! + ! info = psb_err_pivot_too_small_ int_err(1) = i write(ch_err,'(g20.10)') abs(dia) @@ -493,7 +493,7 @@ contains else write(0,*) 'Update not implemented ' info = 31 - call psb_errpush(info,name,& + call psb_errpush(info,name,& & i_err=(/ione*13,izero,izero,izero,izero/),a_err=upd) goto 9999 @@ -515,7 +515,7 @@ contains ! Version: complex ! Note: internal subroutine of psb_cilu0_fact ! - ! This routine copies a row of a sparse matrix A, stored in the psb_cspmat_type + ! This routine copies a row of a sparse matrix A, stored in the psb_cspmat_type ! data structure a, into the arrays lval and uval and into the scalar variable ! dia, corresponding to the lower and upper triangles of A and to the diagonal ! entry of the row, respectively. The entries in lval and uval are stored @@ -529,14 +529,14 @@ contains ! ! The routine is used by psb_cilu0_factint in the computation of the ILU(0)/MILU(0) ! factorization of a local sparse matrix. - ! + ! ! TODO: modify the routine to allow copying into output L and U that are ! already filled with indices; this would allow computing an ILU(k) pattern, - ! then use the ILU(0) internal for subsequent calls with the same pattern. + ! then use the ILU(0) internal for subsequent calls with the same pattern. ! ! Arguments: ! i - integer, input. - ! The local index of the row to be extracted from the + ! The local index of the row to be extracted from the ! sparse matrix structure a. ! m - integer, input. ! The number of rows of the local matrix stored into a. @@ -570,13 +570,13 @@ contains ! to the CSR storage format. ! uval - complex(psb_spk_), dimension(:), input/output. ! The array where the entries of the row corresponding to the - ! upper triangle are copied. + ! upper triangle are copied. ! ktrw - integer, input/output. ! The index identifying the last entry taken from the ! staging buffer trw. See below. ! trw - type(psb_cspmat_type), input/output. ! A staging buffer. If the matrix A is not in CSR format, we use - ! the psb_sp_getblk routine and store its output in trw; when we + ! the psb_sp_getblk routine and store its output in trw; when we ! need to call psb_sp_getblk we do it for a block of rows, and then ! we consume them from trw in successive calls to this routine, ! until we empty the buffer. Thus we will make a call to psb_sp_getblk @@ -608,10 +608,10 @@ contains if (psb_errstatus_fatal()) then info = psb_err_internal_error_; goto 9999 end if - if (psb_toupper(upd) == 'F') then + if (psb_toupper(upd) == 'F') then - select type(aa => a%a) - type is (psb_c_csr_sparse_mat) + select type(aa => a%a) + type is (psb_c_csr_sparse_mat) ! ! Take a fast shortcut if the matrix is stored in CSR format @@ -633,19 +633,19 @@ contains end if enddo - class default + class default ! - ! Otherwise use psb_sp_getblk, slower but able (in principle) of + ! Otherwise use psb_sp_getblk, slower but able (in principle) of ! handling any format. In this case, a block of rows is extracted ! instead of a single row, for performance reasons, and these ! rows are copied one by one into lval, dia, uval, through ! successive calls to ilu_copyin. ! - if ((mod(i,nrb) == 1).or.(nrb == 1)) then + if ((mod(i,nrb) == 1).or.(nrb == 1)) then irb = min(m-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) + call aa%csget(i,i+irb-1,trw,info) if(info /= psb_success_) then info=psb_err_from_subroutine_ ch_err='csget' @@ -656,7 +656,7 @@ contains end if nz = trw%get_nzeros() - do + do if (ktrw > nz) exit if (trw%ia(ktrw) > i) exit k = trw%ja(ktrw) @@ -680,7 +680,7 @@ contains write(0,*) 'Update not implemented ' info = 31 - call psb_errpush(info,name,& + call psb_errpush(info,name,& & i_err=(/ione*13,izero,izero,izero,izero/),a_err=upd) goto 9999 diff --git a/prec/impl/psb_c_iluk_fact.f90 b/prec/impl/psb_c_iluk_fact.f90 index 8748816d..c4ebc678 100644 --- a/prec/impl/psb_c_iluk_fact.f90 +++ b/prec/impl/psb_c_iluk_fact.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,21 +27,21 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! +! ! Moved here from MLD2P4, original copyright below. -! -! -! +! +! +! ! MLD2P4 version 2.2 ! MultiLevel Domain Decomposition Parallel Preconditioners Package ! based on PSBLAS (Parallel Sparse BLAS version 3.5) -! -! (C) Copyright 2008-2018 -! -! Salvatore Filippone -! Pasqua D'Ambra -! Daniela di Serafino -! +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -53,7 +53,7 @@ ! 3. The name of the MLD2P4 group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -65,8 +65,8 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! ! File: psb_ciluk_fact.f90 ! ! Subroutine: psb_ciluk_fact @@ -74,7 +74,7 @@ ! Contains: psb_ciluk_factint, iluk_copyin, iluk_fact, iluk_copyout. ! ! This routine computes either the ILU(k) or the MILU(k) factorization of the -! diagonal blocks of a distributed matrix. These factorizations are used to +! diagonal blocks of a distributed matrix. These factorizations are used to ! build the 'base preconditioner' (block-Jacobi preconditioner/solver, ! Additive Schwarz preconditioner) corresponding to a certain level of a ! multilevel preconditioner. @@ -88,7 +88,7 @@ ! U factors is CSR. The diagonal of the U factor is stored separately (actually, ! the inverse of the diagonal entries is stored; this is then managed in the solve ! stage associated to the ILU(k)/MILU(k) factorization). -! +! ! ! Arguments: ! fill_in - integer, input. @@ -118,7 +118,7 @@ ! factorization. ! Note: its allocation is managed by the calling routine psb_ilu_bld, ! hence it cannot be only intent(out). -! info - integer, output. +! info - integer, output. ! Error code. ! blck - type(psb_cspmat_type), input, optional, target. ! The sparse matrix structure containing the remote rows of the @@ -126,7 +126,7 @@ ! to build an Additive Schwarz base preconditioner with overlap ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck does not contain any row. -! +! subroutine psb_ciluk_fact(fill_in,ialg,a,l,u,d,info,blck) use psb_base_mod @@ -143,7 +143,7 @@ subroutine psb_ciluk_fact(fill_in,ialg,a,l,u,d,info,blck) complex(psb_spk_), intent(inout) :: d(:) ! Local Variables integer(psb_ipk_) :: l1, l2, m, err_act - + type(psb_cspmat_type), pointer :: blck_ type(psb_c_csr_sparse_mat) :: ll, uu character(len=20) :: name, ch_err @@ -152,17 +152,17 @@ subroutine psb_ciluk_fact(fill_in,ialg,a,l,u,d,info,blck) info = psb_success_ call psb_erractionsave(err_act) - ! + ! ! Point to / allocate memory for the incomplete factorization ! - if (present(blck)) then + if (present(blck)) then blck_ => blck else - allocate(blck_,stat=info) - if (info == psb_success_) call blck_%csall(izero,izero,info,ione) + allocate(blck_,stat=info) + if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR') if (info /= psb_success_) then info=psb_err_from_subroutine_ - ch_err='csall' + ch_err='allocate' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if @@ -170,7 +170,7 @@ subroutine psb_ciluk_fact(fill_in,ialg,a,l,u,d,info,blck) m = a%get_nrows() + blck_%get_nrows() if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.& - & (m > size(d)) ) then + & (m > size(d)) ) then write(0,*) 'Wrong allocation status for L,D,U? ',& & l%get_nrows(),size(d),u%get_nrows() info = -1 @@ -203,15 +203,15 @@ subroutine psb_ciluk_fact(fill_in,ialg,a,l,u,d,info,blck) call u%set_triangle() call u%set_unit() call u%set_upper() - + ! ! Nullify pointer / deallocate memory ! - if (present(blck)) then - blck_ => null() + if (present(blck)) then + blck_ => null() else call blck_%free() - deallocate(blck_,stat=info) + deallocate(blck_,stat=info) if(info.ne.0) then info=psb_err_from_subroutine_ ch_err='psb_sp_free' @@ -280,7 +280,7 @@ contains ! according to the CSR storage format. ! lia2 - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the L factor in laspk, according to the CSR storage format. + ! of the L factor in laspk, according to the CSR storage format. ! uval - complex(psb_spk_), dimension(:), input/output. ! The U factor in the incomplete factorization. ! The entries of U are stored according to the CSR format. @@ -289,12 +289,12 @@ contains ! according to the CSR storage format. ! uirp - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the U factor in uval, according to the CSR storage format. + ! of the U factor in uval, according to the CSR storage format. ! l1 - integer, output ! The number of nonzero entries in laspk. ! l2 - integer, output ! The number of nonzero entries in uval. - ! info - integer, output. + ! info - integer, output. ! Error code. ! subroutine psb_ciluk_factint(fill_in,ialg,a,b,& @@ -304,7 +304,7 @@ contains implicit none - ! Arguments + ! Arguments integer(psb_ipk_), intent(in) :: fill_in, ialg type(psb_cspmat_type),intent(in) :: a,b integer(psb_ipk_),intent(inout) :: l1,l2,info @@ -330,14 +330,14 @@ contains select case(ialg) case(psb_ilu_n_,psb_milu_n_) - ! Ok + ! Ok case default info=psb_err_input_asize_invalid_i_ call psb_errpush(info,name,& & i_err=(/itwo,ialg,izero,izero,izero/)) goto 9999 end select - if (fill_in < 0) then + if (fill_in < 0) then info=psb_err_input_asize_invalid_i_ call psb_errpush(info,name, & & i_err=(/ione,fill_in,izero,izero,izero/)) @@ -349,7 +349,7 @@ contains m = ma+mb ! - ! Allocate a temporary buffer for the iluk_copyin function + ! Allocate a temporary buffer for the iluk_copyin function ! call trw%allocate(izero,izero,ione) @@ -361,7 +361,7 @@ contains call psb_errpush(info,name,a_err='psb_sp_all') goto 9999 end if - + l1=0 l2=0 lirp(1) = 1 @@ -381,34 +381,34 @@ contains uplevs(:) = m+1 row(:) = czero rowlevs(:) = -(m+1) - + ! ! Cycle over the matrix rows ! do i = 1, m - + ! ! At each iteration of the loop we keep in a heap the column indices ! affected by the factorization. The heap is initialized and filled ! in the iluk_copyin routine, and updated during the elimination, in ! the iluk_fact routine. The heap is ideal because at each step we need ! the lowest index, but we also need to insert new items, and the heap - ! allows to do both in log time. + ! allows to do both in log time. ! d(i) = czero if (i<=ma) then ! - ! Copy into trw the i-th local row of the matrix, stored in a - ! + ! Copy into trw the i-th local row of the matrix, stored in a + ! call iluk_copyin(i,ma,a,ione,m,row,rowlevs,heap,ktrw,trw,info) else ! ! Copy into trw the i-th local row of the matrix, stored in b - ! (as (i-ma)-th row) - ! + ! (as (i-ma)-th row) + ! call iluk_copyin(i-ma,mb,b,ione,m,row,rowlevs,heap,ktrw,trw,info) endif - + ! Do an elimination step on the current row. It turns out we only ! need to keep track of fill levels for the upper triangle, hence we ! do not have a lowlevs variable. @@ -417,7 +417,7 @@ contains & d,uja,uirp,uval,uplevs,nidx,idxs,info) ! ! Copy the row into lval/d(i)/uval - ! + ! if (info == psb_success_) call iluk_copyout(fill_in,ialg,i,m,row,rowlevs,nidx,idxs,& & l1,l2,lja,lirp,lval,d,uja,uirp,uval,uplevs,info) if (info /= psb_success_) then @@ -474,11 +474,11 @@ contains ! ! This routine is used by psb_ciluk_factint in the computation of the ! ILU(k)/MILU(k) factorization of a local sparse matrix. - ! + ! ! ! Arguments: ! i - integer, input. - ! The local index of the row to be extracted from the + ! The local index of the row to be extracted from the ! sparse matrix structure a. ! m - integer, input. ! The number of rows of the local matrix stored into a. @@ -510,7 +510,7 @@ contains ! staging buffer trw. See below. ! trw - type(psb_cspmat_type), input/output. ! A staging buffer. If the matrix A is not in CSR format, we use - ! the psb_sp_getblk routine and store its output in trw; when we + ! the psb_sp_getblk routine and store its output in trw; when we ! need to call psb_sp_getblk we do it for a block of rows, and then ! we consume them from trw in successive calls to this routine, ! until we empty the buffer. Thus we will make a call to psb_sp_getblk @@ -521,8 +521,8 @@ contains use psb_base_mod implicit none - - ! Arguments + + ! Arguments type(psb_cspmat_type), intent(in) :: a type(psb_c_coo_sparse_mat), intent(inout) :: trw integer(psb_ipk_), intent(in) :: i,m,jmin,jmax @@ -542,17 +542,17 @@ contains if (psb_errstatus_fatal()) then info = psb_err_internal_error_; goto 9999 end if - call heap%init(info) + call heap%init(info) - select type (aa=> a%a) - type is (psb_c_csr_sparse_mat) + select type (aa=> a%a) + type is (psb_c_csr_sparse_mat) ! ! Take a fast shortcut if the matrix is stored in CSR format ! - + do j = aa%irp(i), aa%irp(i+1) - 1 k = aa%ja(j) - if ((jmin<=k).and.(k<=jmax)) then + if ((jmin<=k).and.(k<=jmax)) then row(k) = aa%val(j) rowlevs(k) = 0 call heap%insert(k,info) @@ -562,14 +562,14 @@ contains class default ! - ! Otherwise use psb_sp_getblk, slower but able (in principle) of + ! Otherwise use psb_sp_getblk, slower but able (in principle) of ! handling any format. In this case, a block of rows is extracted ! instead of a single row, for performance reasons, and these ! rows are copied one by one into the array row, through successive ! calls to iluk_copyin. ! - if ((mod(i,nrb) == 1).or.(nrb == 1)) then + if ((mod(i,nrb) == 1).or.(nrb == 1)) then irb = min(m-i+1,nrb) call aa%csget(i,i+irb-1,trw,info) if (info /= psb_success_) then @@ -581,11 +581,11 @@ contains ktrw=1 end if nz = trw%get_nzeros() - do + do if (ktrw > nz) exit if (trw%ia(ktrw) > i) exit k = trw%ja(ktrw) - if ((jmin<=k).and.(k<=jmax)) then + if ((jmin<=k).and.(k<=jmax)) then row(k) = trw%val(ktrw) rowlevs(k) = 0 call heap%insert(k,info) @@ -674,10 +674,10 @@ contains use psb_base_mod - implicit none + implicit none ! Arguments - type(psb_i_heap), intent(inout) :: heap + type(psb_i_heap), intent(inout) :: heap integer(psb_ipk_), intent(in) :: i, fill_in integer(psb_ipk_), intent(inout) :: nidx,info integer(psb_ipk_), intent(inout) :: rowlevs(:) @@ -690,7 +690,7 @@ contains complex(psb_spk_) :: rwk info = psb_success_ - if (.not.allocated(idxs)) then + if (.not.allocated(idxs)) then allocate(idxs(200),stat=info) if (info /= psb_success_) return endif @@ -702,34 +702,34 @@ contains ! do ! Beware: (iret < 0) means that the heap is empty, not an error. - call heap%get_first(k,iret) + call heap%get_first(k,iret) if (iret < 0) return - ! + ! ! Just in case an index has been put on the heap more than once. ! if (k == lastk) cycle - lastk = k + lastk = k nidx = nidx + 1 - if (nidx>size(idxs)) then + if (nidx>size(idxs)) then call psb_realloc(nidx+psb_heap_resize,idxs,info) if (info /= psb_success_) return end if idxs(nidx) = k - - if ((row(k) /= czero).and.(rowlevs(k) <= fill_in).and.(ki) then + else if (j>i) then ! ! Copy the upper part of the row - ! - if (rowlevs(j) <= fill_in) then - l2 = l2 + 1 - if (size(uval) < l2) then - ! + ! + if (rowlevs(j) <= fill_in) then + l2 = l2 + 1 + if (size(uval) < l2) then + ! ! Figure out a good reallocation size! ! isz = max((l2/i)*m,int(1.2*l2),l2+100) - call psb_realloc(isz,uval,info) - if (info == psb_success_) call psb_realloc(isz,uja,info) + call psb_realloc(isz,uval,info) + if (info == psb_success_) call psb_realloc(isz,uja,info) if (info == psb_success_) call psb_realloc(isz,uplevs,info,pad=(m+1)) - if (info /= psb_success_) then + if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') goto 9999 @@ -941,11 +941,11 @@ contains end if uja(l2) = j uval(l2) = row(j) - uplevs(l2) = rowlevs(j) + uplevs(l2) = rowlevs(j) else if (ialg == psb_milu_n_) then ! ! MILU(k): add discarded entries to the diagonal one - ! + ! d(i) = d(i) + row(j) end if ! @@ -963,13 +963,13 @@ contains lirp(i+1) = l1 + 1 uirp(i+1) = l2 + 1 - ! + ! ! Check the pivot size ! if (abs(d(i)) < s_epstol) then ! ! Too small pivot: unstable factorization - ! + ! info = psb_err_pivot_too_small_ int_err(1) = i write(ch_err,'(g20.10)') d(i) diff --git a/prec/impl/psb_c_ilut_fact.f90 b/prec/impl/psb_c_ilut_fact.f90 index 06b8b477..633899de 100644 --- a/prec/impl/psb_c_ilut_fact.f90 +++ b/prec/impl/psb_c_ilut_fact.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,21 +27,21 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! +! ! Moved here from MLD2P4, original copyright below. -! -! -! +! +! +! ! MLD2P4 version 2.2 ! MultiLevel Domain Decomposition Parallel Preconditioners Package ! based on PSBLAS (Parallel Sparse BLAS version 3.5) -! -! (C) Copyright 2008-2018 -! -! Salvatore Filippone -! Pasqua D'Ambra -! Daniela di Serafino -! +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -53,7 +53,7 @@ ! 3. The name of the MLD2P4 group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -65,8 +65,8 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! ! File: psb_cilut_fact.f90 ! ! Subroutine: psb_cilut_fact @@ -87,7 +87,7 @@ ! CSR. The diagonal of the U factor is stored separately (actually, the ! inverse of the diagonal entries is stored; this is then managed in the ! solve stage associated to the ILU(k,t) factorization). -! +! ! ! Arguments: ! fill_in - integer, input. @@ -114,7 +114,7 @@ ! factorization. ! Note: its allocation is managed by the calling routine psb_ilu_bld, ! hence it cannot be only intent(out). -! info - integer, output. +! info - integer, output. ! Error code. ! blck - type(psb_cspmat_type), input, optional, target. ! The sparse matrix structure containing the remote rows of the @@ -122,9 +122,9 @@ ! to build an Additive Schwarz base preconditioner with overlap ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck does not contain any row. -! +! subroutine psb_cilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) - + use psb_base_mod use psb_c_ilu_fact_mod, psb_protect_name => psb_cilut_fact @@ -141,7 +141,7 @@ subroutine psb_cilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) integer(psb_ipk_), intent(in), optional :: iscale ! Local Variables integer(psb_ipk_) :: l1, l2, m, err_act, iscale_ - + type(psb_cspmat_type), pointer :: blck_ type(psb_c_csr_sparse_mat) :: ll, uu real(psb_spk_) :: scale @@ -151,34 +151,34 @@ subroutine psb_cilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) info = psb_success_ call psb_erractionsave(err_act) - if (fill_in < 0) then + if (fill_in < 0) then info=psb_err_input_asize_invalid_i_ call psb_errpush(info,name, & & i_err=(/ione,fill_in,izero,izero,izero/)) goto 9999 end if - ! + ! ! Point to / allocate memory for the incomplete factorization ! - if (present(blck)) then + if (present(blck)) then blck_ => blck else - allocate(blck_,stat=info) - if (info == psb_success_) call blck_%csall(izero,izero,info,ione) + allocate(blck_,stat=info) + if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR') if (info /= psb_success_) then info=psb_err_from_subroutine_ - ch_err='csall' + ch_err='allocate' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if endif - if (present(iscale)) then + if (present(iscale)) then iscale_ = iscale else iscale_ = psb_ilu_scale_none_ end if - - select case(iscale_) + + select case(iscale_) case(psb_ilu_scale_none_) scale = sone case(psb_ilu_scale_maxval_) @@ -189,10 +189,10 @@ subroutine psb_cilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) call psb_errpush(info,name,i_err=(/ione*9,iscale_,izero,izero,izero/)) goto 9999 end select - + m = a%get_nrows() + blck_%get_nrows() if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.& - & (m > size(d)) ) then + & (m > size(d)) ) then write(0,*) 'Wrong allocation status for L,D,U? ',& & l%get_nrows(),size(d),u%get_nrows() info = -1 @@ -225,15 +225,15 @@ subroutine psb_cilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) call u%set_triangle() call u%set_unit() call u%set_upper() - + ! ! Nullify pointer / deallocate memory ! - if (present(blck)) then - blck_ => null() + if (present(blck)) then + blck_ => null() else call blck_%free() - deallocate(blck_,stat=info) + deallocate(blck_,stat=info) if(info.ne.0) then info=psb_err_from_subroutine_ ch_err='psb_sp_free' @@ -298,7 +298,7 @@ contains ! according to the CSR storage format. ! lirp - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the L factor in lval, according to the CSR storage format. + ! of the L factor in lval, according to the CSR storage format. ! uval - complex(psb_spk_), dimension(:), input/output. ! The U factor in the incomplete factorization. ! The entries of U are stored according to the CSR format. @@ -307,12 +307,12 @@ contains ! according to the CSR storage format. ! uirp - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the U factor in uval, according to the CSR storage format. + ! of the U factor in uval, according to the CSR storage format. ! l1 - integer, output ! The number of nonzero entries in lval. ! l2 - integer, output ! The number of nonzero entries in uval. - ! info - integer, output. + ! info - integer, output. ! Error code. ! subroutine psb_cilut_factint(fill_in,thres,a,b,& @@ -320,7 +320,7 @@ contains use psb_base_mod - implicit none + implicit none ! Arguments integer(psb_ipk_), intent(in) :: fill_in @@ -355,7 +355,7 @@ contains m = ma+mb ! - ! Allocate a temporary buffer for the ilut_copyin function + ! Allocate a temporary buffer for the ilut_copyin function ! call trw%allocate(izero,izero,ione) if (info == psb_success_) call psb_ensure_size(m+1,lirp,info) @@ -366,7 +366,7 @@ contains call psb_errpush(info,name,a_err='psb_sp_all') goto 9999 end if - + l1=0 l2=0 lirp(1) = 1 @@ -396,10 +396,10 @@ contains ! in the ilut_copyin function, and updated during the elimination, in ! the ilut_fact routine. The heap is ideal because at each step we need ! the lowest index, but we also need to insert new items, and the heap - ! allows to do both in log time. + ! allows to do both in log time. ! d(i) = czero - if (i<=ma) then + if (i<=ma) then call ilut_copyin(i,ma,a,i,ione,m,nlw,nup,jmaxup,nrmi,weight,& & row,heap,ktrw,trw,info) else @@ -414,7 +414,7 @@ contains & d,uja,uirp,uval,nidx,idxs,info) ! ! Copy the row into lval/d(i)/uval - ! + ! if (info == psb_success_) call ilut_copyout(fill_in,thres,i,m,& & nlw,nup,jmaxup,nrmi,row,nidx,idxs,& & l1,l2,lja,lirp,lval,d,uja,uirp,uval,info) @@ -429,7 +429,7 @@ contains ! ! Adjust diagonal accounting for scale factor ! - if (weight /= sone) then + if (weight /= sone) then d(1:m) = d(1:m)*weight end if @@ -469,7 +469,7 @@ contains ! - storing into a heap the column indices of the nonzero entries of the copied ! row; ! - computing the column index of the first entry with maximum absolute value - ! in the part of the row belonging to the upper triangle; + ! in the part of the row belonging to the upper triangle; ! - computing the 2-norm of the row. ! The output array row is such that it contains a full row of A, i.e. it contains ! also the zero entries of the row. This is useful for the elimination step @@ -482,11 +482,11 @@ contains ! ! This routine is used by psb_cilut_factint in the computation of the ILU(k,t) ! factorization of a local sparse matrix. - ! + ! ! ! Arguments: ! i - integer, input. - ! The local index of the row to be extracted from the + ! The local index of the row to be extracted from the ! sparse matrix structure a. ! m - integer, input. ! The number of rows of the local matrix stored into a. @@ -533,7 +533,7 @@ contains ! staging buffer trw. See below. ! trw - type(psb_cspmat_type), input/output. ! A staging buffer. If the matrix A is not in CSR format, we use - ! the psb_sp_getblk routine and store its output in trw; when we + ! the psb_sp_getblk routine and store its output in trw; when we ! need to call psb_sp_getblk we do it for a block of rows, and then ! we consume them from trw in successive calls to this routine, ! until we empty the buffer. Thus we will make a call to psb_sp_getblk @@ -542,7 +542,7 @@ contains subroutine ilut_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,& & nrmi,weight,row,heap,ktrw,trw,info) use psb_base_mod - implicit none + implicit none type(psb_cspmat_type), intent(in) :: a type(psb_c_coo_sparse_mat), intent(inout) :: trw integer(psb_ipk_), intent(in) :: i, m,jmin,jmax,jd @@ -584,29 +584,29 @@ contains dmaxup = szero nrmi = szero - select type (aa=> a%a) - type is (psb_c_csr_sparse_mat) + select type (aa=> a%a) + type is (psb_c_csr_sparse_mat) ! ! Take a fast shortcut if the matrix is stored in CSR format ! do j = aa%irp(i), aa%irp(i+1) - 1 k = aa%ja(j) - if ((jmin<=k).and.(k<=jmax)) then - row(k) = aa%val(j)*weight + if ((jmin<=k).and.(k<=jmax)) then + row(k) = aa%val(j)*weight call heap%insert(k,info) if (info /= psb_success_) exit - if (kjd) then + if (kjd) then nup = nup + 1 - if (abs(row(k))>dmaxup) then + if (abs(row(k))>dmaxup) then jmaxup = k dmaxup = abs(row(k)) end if end if end if end do - if (info /= psb_success_) then + if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='psb_insert_heap') goto 9999 @@ -617,16 +617,16 @@ contains class default - + ! - ! Otherwise use psb_sp_getblk, slower but able (in principle) of + ! Otherwise use psb_sp_getblk, slower but able (in principle) of ! handling any format. In this case, a block of rows is extracted ! instead of a single row, for performance reasons, and these ! rows are copied one by one into the array row, through successive ! calls to ilut_copyin. ! - if ((mod(i,nrb) == 1).or.(nrb == 1)) then + if ((mod(i,nrb) == 1).or.(nrb == 1)) then irb = min(m-i+1,nrb) call aa%csget(i,i+irb-1,trw,info) if (info /= psb_success_) then @@ -639,18 +639,18 @@ contains kin = ktrw nz = trw%get_nzeros() - do + do if (ktrw > nz) exit if (trw%ia(ktrw) > i) exit k = trw%ja(ktrw) - if ((jmin<=k).and.(k<=jmax)) then + if ((jmin<=k).and.(k<=jmax)) then row(k) = trw%val(ktrw)*weight call heap%insert(k,info) if (info /= psb_success_) exit - if (kjd) then + if (kjd) then nup = nup + 1 - if (abs(row(k))>dmaxup) then + if (abs(row(k))>dmaxup) then jmaxup = k dmaxup = abs(row(k)) end if @@ -734,10 +734,10 @@ contains use psb_base_mod - implicit none + implicit none ! Arguments - type(psb_i_heap), intent(inout) :: heap + type(psb_i_heap), intent(inout) :: heap integer(psb_ipk_), intent(in) :: i integer(psb_ipk_), intent(inout) :: nidx,info real(psb_spk_), intent(in) :: thres,nrmi @@ -753,23 +753,23 @@ contains call psb_ensure_size(200*ione,idxs,info) if (info /= psb_success_) return nidx = 0 - lastk = -1 + lastk = -1 ! ! Do while there are indices to be processed ! do - call heap%get_first(k,iret) + call heap%get_first(k,iret) if (iret < 0) exit - ! + ! ! An index may have been put on the heap more than once. ! if (k == lastk) cycle - lastk = k + lastk = k lowert: if (k nidx) exit if (idxs(idxp) >= i) exit @@ -987,8 +987,8 @@ contains ! if (abs(witem) < thres*nrmi) cycle - nz = nz + 1 - xw(nz) = witem + nz = nz + 1 + xw(nz) = witem xwid(nz) = widx call heap%insert(witem,widx,info) if (info /= psb_success_) then @@ -1000,9 +1000,9 @@ contains ! ! Now we have to take out the first nlw+fill_in entries - ! + ! if (nz <= nlw+fill_in) then - ! + ! ! Just copy everything from xw, and it is already ordered ! else @@ -1014,7 +1014,7 @@ contains call psb_errpush(info,name,a_err='psb_heap_get_first') goto 9999 end if - + xw(k) = witem xwid(k) = widx end do @@ -1029,15 +1029,15 @@ contains ! Copy out the lower part of the row ! do k=1,nz - l1 = l1 + 1 + l1 = l1 + 1 if (size(lval) < l1) then - ! + ! ! Figure out a good reallocation size! - ! + ! isz = (max((l1/i)*m,int(1.2*l1),l1+100)) - call psb_realloc(isz,lval,info) - if (info == psb_success_) call psb_realloc(isz,lja,info) - if (info /= psb_success_) then + call psb_realloc(isz,lval,info) + if (info == psb_success_) call psb_realloc(isz,lja,info) + if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') goto 9999 @@ -1046,25 +1046,25 @@ contains lja(l1) = xwid(k) lval(l1) = xw(indx(k)) end do - + ! ! Make sure idxp points to the diagonal entry ! - if (idxp <= size(idxs)) then - if (idxs(idxp) < i) then - do + if (idxp <= size(idxs)) then + if (idxs(idxp) < i) then + do idxp = idxp + 1 if (idxp > nidx) exit if (idxs(idxp) >= i) exit end do end if end if - if (idxp > size(idxs)) then + if (idxp > size(idxs)) then !!$ write(0,*) 'Warning: missing diagonal element in the row ' else - if (idxs(idxp) > i) then + if (idxs(idxp) > i) then !!$ write(0,*) 'Warning: missing diagonal element in the row ' - else if (idxs(idxp) /= i) then + else if (idxs(idxp) /= i) then !!$ write(0,*) 'Warning: impossible error: diagonal has vanished' else ! @@ -1076,7 +1076,7 @@ contains if (abs(d(i)) < s_epstol) then ! ! Too small pivot: unstable factorization - ! + ! info = psb_err_pivot_too_small_ int_err(1) = i write(ch_err,'(g20.10)') d(i) @@ -1092,7 +1092,7 @@ contains end if ! - ! Now the upper part + ! Now the upper part ! call heap%init(info,dir=psb_asort_down_) @@ -1104,15 +1104,15 @@ contains nz = 0 do - + idxp = idxp + 1 if (idxp > nidx) exit widx = idxs(idxp) - if (widx <= i) then + if (widx <= i) then !!$ write(0,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp) cycle end if - if (widx > m) then + if (widx > m) then !!$ write(0,*) 'Warning: impossible value',widx,i,idxp,idxs(idxp) cycle end if @@ -1120,12 +1120,12 @@ contains ! ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway. ! - if ((widx /= jmaxup) .and. (abs(witem) < thres*nrmi)) then - cycle + if ((widx /= jmaxup) .and. (abs(witem) < thres*nrmi)) then + cycle end if nz = nz + 1 - xw(nz) = witem + xw(nz) = witem xwid(nz) = widx call heap%insert(witem,widx,info) if (info /= psb_success_) then @@ -1133,7 +1133,7 @@ contains call psb_errpush(info,name,a_err='psb_insert_heap') goto 9999 end if - + end do ! @@ -1141,7 +1141,7 @@ contains ! we include entry jmaxup. ! if (nz <= nup+fill_in) then - ! + ! ! Just copy everything from xw ! fndmaxup=.true. @@ -1155,13 +1155,13 @@ contains if (widx == jmaxup) fndmaxup=.true. end do end if - if ((i psb_c_invk_bld + use psb_c_ilu_fact_mod + implicit none + + ! Arguments + type(psb_cspmat_type), intent(in), target :: a + integer(psb_ipk_), intent(in) :: fill1, fill2 + type(psb_cspmat_type), intent(inout) :: lmat, umat + complex(psb_spk_), allocatable :: d(:) + Type(psb_desc_type), Intent(inout) :: desc + integer(psb_ipk_), intent(out) :: info + type(psb_cspmat_type), intent(in), optional :: blck + ! + integer(psb_ipk_) :: i, nztota, err_act, n_row, nrow_a, n_col + type(psb_cspmat_type) :: atmp + complex(psb_spk_), allocatable :: pq(:), pd(:) + integer(psb_ipk_), allocatable :: uplevs(:) + integer(psb_ipk_) :: debug_level, debug_unit + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np,me + integer(psb_ipk_) :: nzrmax + character(len=20) :: name, ch_err + + + info = psb_success_ + name='psb_cinvk_bld' + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + ctxt = psb_cd_get_context(desc) + call psb_info(ctxt, me, np) + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' start' + + ! + ! Check the memory available to hold the incomplete L and U factors + ! and allocate it if needed + ! + nrow_a = a%get_nrows() + nztota = a%get_nzeros() + + if (present(blck)) then + nztota = nztota + blck%get_nzeros() + end if + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & ': out get_nnzeros',nrow_a,nztota,& + & a%get_nrows(),a%get_ncols(),a%get_nzeros() + + + n_row = psb_cd_get_local_rows(desc) + n_col = psb_cd_get_local_cols(desc) + allocate(pd(n_row),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + + + call lmat%allocate(n_row,n_row,info,nz=nztota) + if (info == psb_success_) call umat%allocate(n_row,n_row,info,nz=nztota) + + + call psb_iluk_fact(fill1,psb_ilu_n_,a,lmat,umat,pd,info,blck=blck) + !,uplevs=uplevs) + !call psb_ciluk_fact(fill1,psb_ilu_n_,a,lmat,umat,pd,info,blck=blck) + + if (info == psb_success_) call atmp%allocate(n_row,n_row,info,nz=nztota) + if(info/=0) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + + ! + ! Compute the aprox U^-1 and L^-1 + ! + call psb_sparse_invk(n_row,umat,atmp,fill2,info) + if (info == psb_success_) call psb_move_alloc(atmp,umat,info) + if (info == psb_success_) call lmat%transp() + if (info == psb_success_) call psb_sparse_invk(n_row,lmat,atmp,fill2,info) + if (info == psb_success_) call psb_move_alloc(atmp,lmat,info) + if (info == psb_success_) call lmat%transp() + ! Done. Hopefully.... + + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invt') + goto 9999 + end if + + call psb_move_alloc(pd,d,info) + call lmat%set_asb() + call lmat%trim() + call umat%set_asb() + call umat%trim() + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' end' + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_c_invk_bld + +subroutine psb_csparse_invk(n,a,z,fill_in,info,inlevs) + + use psb_base_mod + use psb_c_invk_fact_mod, psb_protect_name => psb_csparse_invk + + integer(psb_ipk_), intent(in) :: n + type(psb_cspmat_type), intent(in) :: a + type(psb_cspmat_type), intent(inout) :: z + integer(psb_ipk_), intent(in) :: fill_in + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: inlevs(:) + ! + integer(psb_ipk_) :: i,j,k, err_act, nz, nzra, nzrz, ipz1,ipz2, nzz, ip1, ip2, l2 + integer(psb_ipk_), allocatable :: ia(:), ja(:), iz(:), jz(:) + complex(psb_spk_), allocatable :: zw(:), val(:), valz(:) + integer(psb_ipk_), allocatable :: uplevs(:), rowlevs(:), idxs(:) + complex(psb_spk_), allocatable :: row(:) + type(psb_c_coo_sparse_mat) :: trw + type(psb_c_csr_sparse_mat) :: acsr, zcsr + integer(psb_ipk_) :: ktrw, nidx + type(psb_i_heap) :: heap + + real(psb_dpk_) :: alpha + character(len=20) :: name='psb_sp_invk' + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + if (.not.(a%is_triangle().and.a%is_unit().and.a%is_upper())) then + write(psb_err_unit,*) 'Wrong A ' + info = psb_err_internal_error_ + call psb_errpush(psb_err_internal_error_,name,a_err='wrong A') + goto 9999 + end if + call a%cp_to(acsr) + call trw%allocate(izero,izero,ione) + if (info == psb_success_) allocate(zw(n),iz(n),valz(n),& + & row(n),rowlevs(n),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + + allocate(uplevs(acsr%get_nzeros()),stat=info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='Allocate') + goto 9999 + end if + uplevs(:) = 0 + row(:) = czero + rowlevs(:) = -(n+1) + + call zcsr%allocate(n,n,n*fill_in) + call zcsr%set_triangle() + call zcsr%set_unit(.false.) + call zcsr%set_upper() + call psb_ensure_size(n+1, idxs, info) + + + ! + ! + zcsr%irp(1) = 1 + nzz = 0 + + l2 = 0 + outer: do i = 1, n-1 + ! ZW = e_i + call psb_invk_copyin(i,n,acsr,ione,n,row,rowlevs,heap,ktrw,trw,info,& + & sign=-sone,inlevs=inlevs) + row(i) = cone + rowlevs(i) = 0 + + ! Update loop + call psb_invk_inv(fill_in,i,row,rowlevs,heap,& + & acsr%ja,acsr%irp,acsr%val,uplevs,nidx,idxs,info) + + call psb_invk_copyout(fill_in,i,n,row,rowlevs,nidx,idxs,& + & l2,zcsr%ja,zcsr%irp,zcsr%val,info) + + nzz = l2 + end do outer + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='mainloop') + goto 9999 + end if + ipz1 = nzz+1 + call psb_ensure_size(ipz1,zcsr%val,info) + call psb_ensure_size(ipz1,zcsr%ja,info) + zcsr%val(ipz1) = cone + zcsr%ja(ipz1) = n + zcsr%irp(n+1) = ipz1+1 + call zcsr%set_sorted() + call z%mv_from(zcsr) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_csparse_invk + +subroutine psb_c_invk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,ktrw,trw,info,sign,inlevs) + + use psb_base_mod + use psb_c_invk_fact_mod, psb_protect_name => psb_c_invk_copyin + + implicit none + + ! Arguments + type(psb_c_csr_sparse_mat), intent(in) :: a + type(psb_c_coo_sparse_mat), intent(inout) :: trw + integer(psb_ipk_), intent(in) :: i,m,jmin,jmax + integer(psb_ipk_), intent(inout) :: ktrw,info + integer(psb_ipk_), intent(inout) :: rowlevs(:) + complex(psb_spk_), intent(inout) :: row(:) + type(psb_i_heap), intent(inout) :: heap + real(psb_spk_), optional, intent(in) :: sign + integer(psb_ipk_), intent(in), optional :: inlevs(:) + + ! Local variables + integer(psb_ipk_) :: k,j,irb,err_act, nz + integer(psb_ipk_), parameter :: nrb=16 + real(psb_spk_) :: sign_ + character(len=20), parameter :: name='invk_copyin' + character(len=20) :: ch_err + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + call heap%init(info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_init_heap') + goto 9999 + end if + + if (present(sign)) then + sign_ = sign + else + sign_ = sone + end if + + + ! + ! Take a fast shortcut if the matrix is stored in CSR format + ! + if (present(inlevs)) then + do j = a%irp(i), a%irp(i+1) - 1 + k = a%ja(j) + if ((jmin<=k).and.(k<=jmax)) then + row(k) = sign_ * a%val(j) + rowlevs(k) = inlevs(j) + call heap%insert(k,info) + end if + end do + else + do j = a%irp(i), a%irp(i+1) - 1 + k = a%ja(j) + if ((jmin<=k).and.(k<=jmax)) then + row(k) = sign_ * a%val(j) + rowlevs(k) = 0 + call heap%insert(k,info) + end if + end do + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_c_invk_copyin + + +subroutine psb_c_invk_copyout(fill_in,i,m,row,rowlevs,nidx,idxs,& + & l2,uia1,uia2,uaspk,info) + + use psb_base_mod + use psb_c_invk_fact_mod, psb_protect_name => psb_c_invk_copyout + + implicit none + + ! Arguments + integer(psb_ipk_), intent(in) :: fill_in, i, m, nidx + integer(psb_ipk_), intent(inout) :: l2, info + integer(psb_ipk_), intent(inout) :: rowlevs(:), idxs(:) + integer(psb_ipk_), allocatable, intent(inout) :: uia1(:), uia2(:) + complex(psb_spk_), allocatable, intent(inout) :: uaspk(:) + complex(psb_spk_), intent(inout) :: row(:) + + ! Local variables + integer(psb_ipk_) :: j,isz,err_act,int_err(5),idxp + character(len=20), parameter :: name='psb_ciluk_factint' + character(len=20) :: ch_err + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + do idxp=1,nidx + + j = idxs(idxp) + + if (j>=i) then + ! + ! Copy the upper part of the row + ! + if (rowlevs(j) <= fill_in) then + l2 = l2 + 1 + if (size(uaspk) < l2) then + ! + ! Figure out a good reallocation size! + ! + isz = max(int(1.2*l2),l2+100) + call psb_realloc(isz,uaspk,info) + if (info == psb_success_) call psb_realloc(isz,uia1,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='Allocate') + goto 9999 + end if + end if + uia1(l2) = j + uaspk(l2) = row(j) + end if + ! + ! Re-initialize row(j) and rowlevs(j) + ! + row(j) = czero + rowlevs(j) = -(m+1) + end if + end do + + uia2(i+1) = l2 + 1 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_c_invk_copyout + +subroutine psb_cinvk_inv(fill_in,i,row,rowlevs,heap,ja,irp,val,uplevs,& + & nidx,idxs,info) + + use psb_base_mod + use psb_c_invk_fact_mod, psb_protect_name => psb_cinvk_inv + + implicit none + + ! Arguments + type(psb_i_heap), intent(inout) :: heap + integer(psb_ipk_), intent(in) :: i, fill_in + integer(psb_ipk_), intent(inout) :: nidx,info + integer(psb_ipk_), intent(inout) :: rowlevs(:) + integer(psb_ipk_), allocatable, intent(inout) :: idxs(:) + integer(psb_ipk_), intent(in) :: ja(:),irp(:),uplevs(:) + complex(psb_spk_), intent(in) :: val(:) + complex(psb_spk_), intent(inout) :: row(:) + + ! Local variables + integer(psb_ipk_) :: k,j,lrwk,jj,lastk, iret + real(psb_dpk_) :: rwk + + + info = psb_success_ + + call psb_ensure_size(200, idxs, info) + if (info /= psb_success_) return + nidx = 1 + idxs(1) = i + lastk = i + + ! + ! Do while there are indices to be processed + ! + do + ! Beware: (iret < 0) means that the heap is empty, not an error. + call heap%get_first(k,iret) + if (iret < 0) then +!!$ write(psb_err_unit,*) 'IINVK: ',i,' returning at ',lastk + return + end if + + ! + ! Just in case an index has been put on the heap more than once. + ! + if (k == lastk) cycle + + lastk = k + nidx = nidx + 1 + if (nidx>size(idxs)) then + call psb_realloc(nidx+psb_heap_resize,idxs,info) + if (info /= psb_success_) return + end if + idxs(nidx) = k + + if ((row(k) /= czero).and.(rowlevs(k) <= fill_in)) then + ! + ! Note: since U is scaled while copying it out (see iluk_copyout), + ! we can use rwk in the update below + ! + rwk = row(k) + lrwk = rowlevs(k) + + do jj=irp(k),irp(k+1)-1 + j = ja(jj) + if (j<=k) then + info = -i + return + endif + ! + ! Insert the index into the heap for further processing. + ! The fill levels are initialized to a negative value. If we find + ! one, it means that it is an as yet untouched index, so we need + ! to insert it; otherwise it is already on the heap, there is no + ! need to insert it more than once. + ! + if (rowlevs(j)<0) then + call heap%insert(j,info) + if (info /= psb_success_) return + rowlevs(j) = abs(rowlevs(j)) + end if + ! + ! Update row(j) and the corresponding fill level + ! + row(j) = row(j) - rwk * val(jj) + rowlevs(j) = min(rowlevs(j),lrwk+uplevs(jj)+1) + end do + + end if + end do + +end subroutine psb_cinvk_inv diff --git a/prec/impl/psb_c_invt_fact.f90 b/prec/impl/psb_c_invt_fact.f90 new file mode 100644 index 00000000..73e242f6 --- /dev/null +++ b/prec/impl/psb_c_invt_fact.f90 @@ -0,0 +1,739 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG4PSBLAS, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! + +subroutine psb_c_invt_bld(a,fillin,invfill,thresh,invthresh,& + & lmat,d,umat,desc,info,blck) + + use psb_base_mod + use psb_c_invt_fact_mod, psb_protect_name => psb_c_invt_bld + use psb_c_ilu_fact_mod + implicit none + + ! Arguments + type(psb_cspmat_type), intent(in), target :: a + integer(psb_ipk_), intent(in) :: fillin,invfill + real(psb_spk_), intent(in) :: thresh + real(psb_spk_), intent(in) :: invthresh + type(psb_cspmat_type), intent(inout) :: lmat, umat + complex(psb_spk_), allocatable :: d(:) + Type(psb_desc_type), Intent(inout) :: desc + integer(psb_ipk_), intent(out) :: info + type(psb_cspmat_type), intent(in), optional :: blck + ! + integer(psb_ipk_) :: i, nztota, err_act, n_row, nrow_a, n_col + type(psb_cspmat_type) :: atmp + complex(psb_spk_), allocatable :: pq(:), pd(:), w(:) + integer(psb_ipk_) :: debug_level, debug_unit + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np,me + integer(psb_ipk_) :: nzrmax + real(psb_spk_) :: sp_thresh + character(len=20) :: name, ch_err, fname + + + info = psb_success_ + name='psb_cinvt_fact' + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + ctxt = psb_cd_get_context(desc) + call psb_info(ctxt, me, np) + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' start' + + ! + ! Check the memory available to hold the incomplete L and U factors + ! and allocate it if needed + ! + nrow_a = a%get_nrows() + nztota = a%get_nzeros() + + if (present(blck)) then + nztota = nztota + blck%get_nzeros() + end if + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & ': out get_nnzeros',nrow_a,nztota,& + & a%get_nrows(),a%get_ncols(),a%get_nzeros() + + + n_row = psb_cd_get_local_rows(desc) + n_col = psb_cd_get_local_cols(desc) + allocate(pd(n_row),w(n_row),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + + nzrmax = fillin + sp_thresh = thresh + + call lmat%allocate(n_row,n_row,info,nz=nztota) + if (info == psb_success_) call umat%allocate(n_row,n_row,info,nz=nztota) + + if (info == 0) call psb_ilut_fact(nzrmax,sp_thresh,& + & a,lmat,umat,pd,info,blck=blck,iscale=psb_ilu_scale_maxval_) + + if (info == psb_success_) call atmp%allocate(n_row,n_row,info,nz=nztota) + if(info/=0) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (.false.) then +!!$ if (debug_level >= psb_debug_inner_) then + write(fname,'(a,i0,a)') 'invt-lo-',me,'.mtx' + call lmat%print(fname,head="INVTLOW") + write(fname,'(a,i0,a)') 'invt-up-',me,'.mtx' + call umat%print(fname,head="INVTUPP") + end if + + ! + ! Compute the approx U^-1 and L^-1 + ! + nzrmax = invfill + call psb_csparse_invt(n_row,umat,atmp,nzrmax,invthresh,info) + if (info == psb_success_) call psb_move_alloc(atmp,umat,info) + if (info == psb_success_) call lmat%transp() + if (info == psb_success_) call psb_csparse_invt(n_row,lmat,atmp,nzrmax,invthresh,info) + if (info == psb_success_) call psb_move_alloc(atmp,lmat,info) + if (info == psb_success_) call lmat%transp() + ! Done. Hopefully.... + + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invt') + goto 9999 + end if + + call psb_move_alloc(pd,d,info) + call lmat%set_asb() + call lmat%trim() + call umat%set_asb() + call umat%trim() + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' end' + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_c_invt_bld + +subroutine psb_csparse_invt(n,a,z,nzrmax,sp_thresh,info) + + use psb_base_mod + use psb_c_invt_fact_mod, psb_protect_name => psb_csparse_invt + + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_cspmat_type), intent(in) :: a + type(psb_cspmat_type), intent(inout) :: z + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_spk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: i,j,k, err_act, nz, nzra, nzrz, ipz1,ipz2, nzz, ip1, ip2, l2 + integer(psb_ipk_), allocatable :: ia(:), ja(:), iz(:),jz(:) + complex(psb_spk_), allocatable :: zw(:), val(:), valz(:) + integer(psb_ipk_), allocatable :: uplevs(:), rowlevs(:),idxs(:) + complex(psb_spk_), allocatable :: row(:) + type(psb_c_coo_sparse_mat) :: trw + type(psb_c_csr_sparse_mat) :: acsr, zcsr + integer(psb_ipk_) :: ktrw, nidx, nlw,nup,jmaxup + type(psb_i_heap) :: heap + real(psb_spk_) :: alpha, nrmi + character(len=20) :: name='psb_sp_invt' + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + if (.not.(a%is_triangle().and.a%is_unit().and.a%is_upper())) then + write(psb_err_unit,*) 'Wrong A ' + info = psb_err_internal_error_ + call psb_errpush(psb_err_internal_error_,name,a_err='wrong A') + goto 9999 + end if + call a%cp_to(acsr) + call trw%allocate(izero,izero,ione) + if (info == psb_success_) allocate(zw(n),iz(n),valz(n),& + & row(n),rowlevs(n),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + + call zcsr%allocate(n,n,n*nzrmax) + call zcsr%set_triangle() + call zcsr%set_unit(.false.) + call zcsr%set_upper() + ! + ! + nzz = 0 + row(:) = czero + rowlevs(:) = 0 + l2 = 0 + zcsr%irp(1) = 1 + + outer: do i = 1, n-1 + ! ZW = e_i + call psb_c_invt_copyin(i,n,acsr,i,ione,n,nlw,nup,jmaxup,nrmi,row,& + & heap,rowlevs,ktrw,trw,info,sign=-sone) + if (info /= 0) exit + row(i) = cone + ! Adjust norm + if (nrmi < sone) then + nrmi = sqrt(sone + nrmi**2) + else + nrmi = nrmi*sqrt(cone+cone/(nrmi**2)) + end if + + call psb_invt_inv(sp_thresh,i,nrmi,row,heap,rowlevs,& + & acsr%ja,acsr%irp,acsr%val,nidx,idxs,info) + if (info /= 0) exit +!!$ write(0,*) 'Calling copyout ',nzrmax,nlw,nup,nidx,l2 + call psb_c_invt_copyout(nzrmax,sp_thresh,i,n,nlw,nup,jmaxup,nrmi,row,& + & nidx,idxs,l2,zcsr%ja,zcsr%irp,zcsr%val,info) + if (info /= 0) exit + nzz = l2 + end do outer + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='mainloop') + goto 9999 + end if + + ipz1 = nzz+1 + call psb_ensure_size(ipz1,zcsr%val,info) + call psb_ensure_size(ipz1,zcsr%ja,info) + zcsr%val(ipz1) = cone + zcsr%ja(ipz1) = n + zcsr%irp(n+1) = ipz1+1 + + call z%mv_from(zcsr) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_csparse_invt + +subroutine psb_c_invt_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,nrmi,row,heap,& + & irwt,ktrw,trw,info,sign) + use psb_base_mod + use psb_d_invt_fact_mod, psb_protect_name => psb_d_invt_copyin + implicit none + type(psb_c_csr_sparse_mat), intent(in) :: a + type(psb_c_coo_sparse_mat), intent(inout) :: trw + integer(psb_ipk_), intent(in) :: i, m,jmin,jmax,jd + integer(psb_ipk_), intent(inout) :: ktrw,nlw,nup,jmaxup,info + integer(psb_ipk_), intent(inout) :: irwt(:) + real(psb_spk_), intent(inout) :: nrmi + complex(psb_spk_), intent(inout) :: row(:) + type(psb_i_heap), intent(inout) :: heap + real(psb_spk_), intent(in), optional :: sign + ! + integer(psb_ipk_) :: k,j,irb,kin,nz, err_act + integer(psb_ipk_), parameter :: nrb=16 + real(psb_dpk_) :: dmaxup, sign_ + real(psb_dpk_), external :: dnrm2 + character(len=20), parameter :: name='invt_copyin' + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + call heap%init(info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_init_heap') + goto 9999 + end if + sign_ = sone + if (present(sign)) sign_ = sign + ! + ! nrmi is the norm of the current sparse row (for the time being, + ! we use the 2-norm). + ! NOTE: the 2-norm below includes also elements that are outside + ! [jmin:jmax] strictly. Is this really important? TO BE CHECKED. + ! + + nlw = 0 + nup = 0 + jmaxup = 0 + dmaxup = czero + nrmi = czero + + do j = a%irp(i), a%irp(i+1) - 1 + k = a%ja(j) + if ((jmin<=k).and.(k<=jmax)) then + row(k) = sign_ * a%val(j) + call heap%insert(k,info) + irwt(k) = 1 + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + goto 9999 + end if + end if + if (kjd) then + nup = nup + 1 + if (abs(row(k))>dmaxup) then + jmaxup = k + dmaxup = abs(row(k)) + end if + end if + end do + nz = a%irp(i+1) - a%irp(i) + nrmi = dnrm2(nz,a%val(a%irp(i):),ione) + + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_c_invt_copyin + +subroutine psb_c_invt_copyout(fill_in,thres,i,m,nlw,nup,jmaxup,nrmi,row, & + & nidx,idxs,l2,ja,irp,val,info) + + use psb_base_mod + use psb_c_invt_fact_mod, psb_protect_name => psb_c_invt_copyout + + implicit none + + ! Arguments + integer(psb_ipk_), intent(in) :: fill_in,i,m,nidx,nlw,nup,jmaxup + integer(psb_ipk_), intent(in) :: idxs(:) + integer(psb_ipk_), intent(inout) :: l2, info + integer(psb_ipk_), allocatable, intent(inout) :: ja(:),irp(:) + real(psb_spk_), intent(in) :: thres,nrmi + complex(psb_spk_),allocatable, intent(inout) :: val(:) + complex(psb_spk_), intent(inout) :: row(:) + + ! Local variables + real(psb_dpk_),allocatable :: xw(:) + integer(psb_ipk_), allocatable :: xwid(:), indx(:) + real(psb_dpk_) :: witem, wmin + integer(psb_ipk_) :: widx + integer(psb_ipk_) :: k,isz,err_act,int_err(5),idxp, nz + type(psb_d_idx_heap) :: heap + character(len=20), parameter :: name='invt_copyout' + character(len=20) :: ch_err + logical :: fndmaxup + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + ! + ! Here we need to apply also the dropping rule base on the fill-in. + ! We do it by putting into a heap the elements that are not dropped + ! by using the 2-norm rule, and then copying them out. + ! + ! The heap goes down on the entry absolute value, so the first item + ! is the largest absolute value. + ! +!!$ write(0,*) 'invt_copyout ',nidx,nup+fill_in + call heap%init(info,dir=psb_asort_down_) + + if (info == psb_success_) allocate(xwid(nidx),xw(nidx),indx(nidx),stat=info) + if (info /= psb_success_) then + info=psb_err_alloc_request_ + call psb_errpush(info,name,i_err=(/3*nidx/),& + & a_err='real(psb_dpk_)') + goto 9999 + end if + + ! + ! First the lower part + ! + + nz = 0 + idxp = 0 + + do + + idxp = idxp + 1 + if (idxp > nidx) exit + if (idxs(idxp) >= i) exit + widx = idxs(idxp) + witem = row(widx) + ! + ! Dropping rule based on the 2-norm + ! + if (abs(witem) < thres*nrmi) cycle + + nz = nz + 1 + xw(nz) = witem + xwid(nz) = widx + call heap%insert(witem,widx,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + goto 9999 + end if + end do + + if (nz > 1) then + write(psb_err_unit,*) 'Warning: lower triangle from invt???? ' + end if + + + if (idxp <= size(idxs)) then + if (idxs(idxp) < i) then + do + idxp = idxp + 1 + if (idxp > nidx) exit + if (idxs(idxp) >= i) exit + end do + end if + end if + idxp = idxp - 1 + nz = 0 + wmin=HUGE(wmin) + if (.false.) then + do + + idxp = idxp + 1 + if (idxp > nidx) exit + widx = idxs(idxp) + if (widx < i) then + write(psb_err_unit,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp) + cycle + end if + if (widx > m) then + cycle + end if + witem = row(widx) + ! + ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway. + ! + if ((widx /= jmaxup) .and. (widx /= i) .and. (abs(witem) < thres*nrmi)) then + cycle + end if + if ((widx/=jmaxup).and.(nz > nup+fill_in)) then + if (abs(witem) < wmin) cycle + endif + wmin = min(abs(witem),wmin) + nz = nz + 1 + xw(nz) = witem + xwid(nz) = widx + call heap%insert(witem,widx,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + goto 9999 + end if + + end do + + ! + ! Now we have to take out the first nup-fill_in entries. But make sure + ! we include entry jmaxup. + ! + if (nz <= nup+fill_in) then + ! + ! Just copy everything from xw + ! + fndmaxup=.true. + else + fndmaxup = .false. + nz = nup+fill_in + do k=1,nz + call heap%get_first(witem,widx,info) + xw(k) = witem + xwid(k) = widx + if (widx == jmaxup) fndmaxup=.true. + end do + end if + if ((i nidx) exit + widx = idxs(idxp) + if (widx < i) then + write(psb_err_unit,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp) + cycle + end if + if (widx > m) then + cycle + end if + witem = row(widx) + ! + ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway. + ! + if ((widx /= i) .and. (abs(witem) < thres*nrmi)) then + cycle + end if + if (nz > nup+fill_in) then + if (abs(witem) < wmin) cycle + endif + wmin = min(abs(witem),wmin) + nz = nz + 1 + xw(nz) = witem + xwid(nz) = widx + call heap%insert(witem,widx,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + goto 9999 + end if + + end do + + ! + ! Now we have to take out the first nup-fill_in entries. But make sure + ! we include entry jmaxup. + ! + if (nz > nup+fill_in) then + nz = nup+fill_in + do k=1,nz + call heap%get_first(witem,widx,info) + xw(k) = witem + xwid(k) = widx + end do + end if + end if + + ! + ! Now we put things back into ascending column order + ! + call psb_msort(xwid(1:nz),indx(1:nz),dir=psb_sort_up_) + + ! + ! Copy out the upper part of the row + ! + do k=1,nz + l2 = l2 + 1 + if (size(val) < l2) then + ! + ! Figure out a good reallocation size! + ! + isz = max(int(1.2*l2),l2+100) + call psb_realloc(isz,val,info) + if (info == psb_success_) call psb_realloc(isz,ja,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='Allocate') + goto 9999 + end if + end if + ja(l2) = xwid(k) + val(l2) = xw(indx(k)) + end do + + ! + ! Set row to zero + ! + do idxp=1,nidx + row(idxs(idxp)) = czero + end do + + irp(i+1) = l2 + 1 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_c_invt_copyout + +subroutine psb_c_invt_inv(thres,i,nrmi,row,heap,irwt,ja,irp,val,nidx,idxs,info) + + use psb_base_mod + use psb_c_invt_fact_mod, psb_protect_name => psb_c_invt_inv + + implicit none + + ! Arguments + type(psb_i_heap), intent(inout) :: heap + integer(psb_ipk_), intent(in) :: i + integer(psb_ipk_), intent(inout) :: nidx,info + integer(psb_ipk_), intent(inout) :: irwt(:) + real(psb_spk_), intent(in) :: thres,nrmi + integer(psb_ipk_), allocatable, intent(inout) :: idxs(:) + integer(psb_ipk_), intent(in) :: ja(:),irp(:) + complex(psb_spk_), intent(in) :: val(:) + complex(psb_spk_), intent(inout) :: row(:) + + ! Local Variables + integer(psb_ipk_) :: k,j,jj,lastk,iret + real(psb_dpk_) :: rwk, alpha + + info = psb_success_ + + call psb_ensure_size(200, idxs, info) + if (info /= psb_success_) return + nidx = 1 + idxs(1) = i + lastk = i + irwt(i) = 1 +!!$ write(0,*) 'Drop Threshold ',thres*nrmi + ! + ! Do while there are indices to be processed + ! + do + + call heap%get_first(k,iret) + if (iret < 0) exit + + ! + ! An index may have been put on the heap more than once. + ! Should not happen, but just in case. + ! + if (k == lastk) cycle + lastk = k + + ! + ! Dropping rule based on the threshold: compare the absolute + ! value of each updated entry of row with thres * 2-norm of row. + ! + rwk = row(k) + + if (abs(rwk) < thres*nrmi) then + ! + ! Drop the entry. + ! + row(k) = dzero + irwt(k) = 0 + cycle + else + ! + ! Note: since U is scaled while copying it out (see ilut_copyout), + ! we can use rwk in the update below. + ! + do jj=irp(k),irp(k+1)-1 + j = ja(jj) + if (j<=k) then + info = -i + return + endif + ! + ! Update row(j) and, if it is not to be discarded, insert + ! its index into the heap for further processing. + ! + row(j) = row(j) - rwk * val(jj) + if (irwt(j) == 0) then + if (abs(row(j)) < thres*nrmi) then + ! + ! Drop the entry. + ! + row(j) = dzero + else + ! + ! Do the insertion. + ! + call heap%insert(j,info) + if (info /= psb_success_) return + irwt(j) = 1 + end if + end if + end do + end if + + ! + ! If we get here it is an index we need to keep on copyout. + ! + + nidx = nidx + 1 + call psb_ensure_size(nidx,idxs,info,addsz=psb_heap_resize) + if (info /= psb_success_) return + idxs(nidx) = k + irwt(k) = 0 + end do + + irwt(i) = 0 +end subroutine psb_c_invt_inv diff --git a/prec/impl/psb_c_prec_type_impl.f90 b/prec/impl/psb_c_prec_type_impl.f90 index 81a139ea..e82d7dca 100644 --- a/prec/impl/psb_c_prec_type_impl.f90 +++ b/prec/impl/psb_c_prec_type_impl.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,14 +27,14 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! -! +! +! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -46,7 +46,7 @@ !!$ 3. The name of the PSBLAS group or the names of its contributors may !!$ not be used to endorse or promote products derived from this !!$ software without specific written permission. -!!$ +!!$ !!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS !!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED !!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -58,14 +58,14 @@ !!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) !!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE !!$ POSSIBILITY OF SUCH DAMAGE. -!!$ -!!$ +!!$ +!!$ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine psb_c_apply2_vect(prec,x,y,desc_data,info,trans,work) use psb_base_mod use psb_c_prec_type, psb_protect_name => psb_c_apply2_vect - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_cprec_type), intent(inout) :: prec type(psb_c_vect_type),intent(inout) :: x @@ -74,7 +74,7 @@ subroutine psb_c_apply2_vect(prec,x,y,desc_data,info,trans,work) character(len=1), optional :: trans complex(psb_spk_),intent(inout), optional, target :: work(:) - character :: trans_ + character :: trans_ complex(psb_spk_), pointer :: work_(:) type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me @@ -88,25 +88,25 @@ subroutine psb_c_apply2_vect(prec,x,y,desc_data,info,trans,work) ctxt = desc_data%get_context() call psb_info(ctxt, me, np) - if (present(trans)) then + if (present(trans)) then trans_=psb_toupper(trans) else trans_='N' end if - if (present(work)) then + if (present(work)) then work_ => work else allocate(work_(4*desc_data%get_local_cols()),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') - goto 9999 + goto 9999 end if end if - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") goto 9999 @@ -115,13 +115,13 @@ subroutine psb_c_apply2_vect(prec,x,y,desc_data,info,trans,work) call prec%prec%apply(cone,x,czero,y,desc_data,info,& & trans=trans_,work=work_) - if (present(work)) then + if (present(work)) then else deallocate(work_,stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='DeAllocate') - goto 9999 + goto 9999 end if end if @@ -136,7 +136,7 @@ end subroutine psb_c_apply2_vect subroutine psb_c_apply1_vect(prec,x,desc_data,info,trans,work) use psb_base_mod use psb_c_prec_type, psb_protect_name => psb_c_apply1_vect - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_cprec_type), intent(inout) :: prec type(psb_c_vect_type),intent(inout) :: x @@ -145,7 +145,7 @@ subroutine psb_c_apply1_vect(prec,x,desc_data,info,trans,work) complex(psb_spk_),intent(inout), optional, target :: work(:) type(psb_c_vect_type) :: ww - character :: trans_ + character :: trans_ complex(psb_spk_), pointer :: work_(:) type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me @@ -159,25 +159,25 @@ subroutine psb_c_apply1_vect(prec,x,desc_data,info,trans,work) ctxt = desc_data%get_context() call psb_info(ctxt, me, np) - if (present(trans)) then + if (present(trans)) then trans_=psb_toupper(trans) else trans_='N' end if - if (present(work)) then + if (present(work)) then work_ => work else allocate(work_(4*desc_data%get_local_cols()),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') - goto 9999 + goto 9999 end if end if - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") goto 9999 @@ -188,13 +188,13 @@ subroutine psb_c_apply1_vect(prec,x,desc_data,info,trans,work) & trans=trans_,work=work_) if (info == 0) call psb_geaxpby(cone,ww,czero,x,desc_data,info) call psb_gefree(ww,desc_data,info) - if (present(work)) then + if (present(work)) then else deallocate(work_,stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='DeAllocate') - goto 9999 + goto 9999 end if end if @@ -209,7 +209,7 @@ end subroutine psb_c_apply1_vect subroutine psb_c_apply2v(prec,x,y,desc_data,info,trans,work) use psb_base_mod use psb_c_prec_type, psb_protect_name => psb_c_apply2v - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_cprec_type), intent(inout) :: prec complex(psb_spk_),intent(inout) :: x(:) @@ -218,7 +218,7 @@ subroutine psb_c_apply2v(prec,x,y,desc_data,info,trans,work) character(len=1), optional :: trans complex(psb_spk_),intent(inout), optional, target :: work(:) - character :: trans_ + character :: trans_ complex(psb_spk_), pointer :: work_(:) type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me @@ -232,37 +232,37 @@ subroutine psb_c_apply2v(prec,x,y,desc_data,info,trans,work) ctxt = desc_data%get_context() call psb_info(ctxt, me, np) - if (present(trans)) then + if (present(trans)) then trans_=trans else trans_='N' end if - if (present(work)) then + if (present(work)) then work_ => work else allocate(work_(4*desc_data%get_local_cols()),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') - goto 9999 + goto 9999 end if end if - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") goto 9999 end if call prec%prec%apply(cone,x,czero,y,desc_data,info,trans_,work=work_) - if (present(work)) then + if (present(work)) then else deallocate(work_,stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='DeAllocate') - goto 9999 + goto 9999 end if end if @@ -277,7 +277,7 @@ end subroutine psb_c_apply2v subroutine psb_c_apply1v(prec,x,desc_data,info,trans) use psb_base_mod use psb_c_prec_type, psb_protect_name => psb_c_apply1v - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_cprec_type), intent(inout) :: prec complex(psb_spk_),intent(inout) :: x(:) @@ -293,36 +293,34 @@ subroutine psb_c_apply1v(prec,x,desc_data,info,trans) name='psb_c_apply1v' info = psb_success_ call psb_erractionsave(err_act) - - ctxt = desc_data%get_context() call psb_info(ctxt, me, np) - if (present(trans)) then + if (present(trans)) then trans_=psb_toupper(trans) else trans_='N' end if - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") goto 9999 end if allocate(ww(size(x)),w1(size(x)),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') - goto 9999 + goto 9999 end if call prec%prec%apply(cone,x,czero,ww,desc_data,info,& & trans_,work=w1) if(info /= psb_success_) goto 9999 x(:) = ww(:) deallocate(ww,W1,stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='DeAllocate') - goto 9999 + goto 9999 end if @@ -336,3 +334,159 @@ subroutine psb_c_apply1v(prec,x,desc_data,info,trans) end subroutine psb_c_apply1v +subroutine psb_ccprecseti(prec,what,val,info,ilev,ilmax,pos,idx) + use psb_base_mod + use psb_c_prec_type, psb_protect_name => psb_ccprecseti + implicit none + + class(psb_cprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + integer(psb_ipk_), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + ! This optional inputs are backport from the inputs available in AMG4PSBLAS, + ! they are of no actual use here a part from compatibility reasons. + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + + ! Local variables + character(len=*), parameter :: name='psb_precseti' + + info = psb_success_ + + ! We need to convert from the 'what' string to the corresponding integer + ! value befor passing the call to the set of the inner method. + select case (psb_toupper(trim(what))) + case ('SUB_FILLIN') + call prec%prec%precset(psb_ilu_fill_in_,val,info) + case ('INV_FILLIN') + call prec%prec%precset(psb_inv_fillin_,val,info) + case default + info = psb_err_invalid_args_combination_ + write(psb_err_unit,*) name,& + & ': Error: uninitialized preconditioner,',& + &' should call prec%init' + return + end select + +end subroutine psb_ccprecseti + +subroutine psb_ccprecsetr(prec,what,val,info,ilev,ilmax,pos,idx) + use psb_base_mod + use psb_c_prec_type, psb_protect_name => psb_ccprecsetr + implicit none + + class(psb_cprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + real(psb_spk_), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + ! This optional inputs are backport from the inputs available in AMG4PSBLAS, + ! they are of no actual use here a part from compatibility reasons. + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + + ! Local variables + character(len=*), parameter :: name='psb_precsetr' + + info = psb_success_ + + ! We need to convert from the 'what' string to the corresponding integer + ! value befor passing the call to the set of the inner method. + select case (psb_toupper(trim(what))) + case('SUB_ILUTHRS') + call prec%prec%precset(psb_fact_eps_,val,info) + case('INV_THRESH') + call prec%prec%precset(psb_inv_thresh_,val,info) + case default + info = psb_err_invalid_args_combination_ + write(psb_err_unit,*) name,& + & ': Error: uninitialized preconditioner,',& + &' should call prec%init' + return + end select + +end subroutine psb_ccprecsetr + +subroutine psb_ccprecsetc(prec,what,string,info,ilev,ilmax,pos,idx) + use psb_base_mod + use psb_c_prec_type, psb_protect_name => psb_ccprecsetc + implicit none + + class(psb_cprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + character(len=*), intent(in) :: string + integer(psb_ipk_), intent(out) :: info + ! This optional inputs are backport from the inputs available in AMG4PSBLAS, + ! they are of no actual use here a part from compatibility reasons. + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + + ! Local variables + character(len=*), parameter :: name='psb_precsetc' + + info = psb_success_ + + ! We need to convert from the 'what' string to the corresponding integer + ! value befor passing the call to the set of the inner method. + select case (psb_toupper(trim(what))) + case ('SUB_SOLVE') + ! We select here the type of solver on the block + select case (psb_toupper(trim(string))) + case("ILU") + call prec%prec%precset(psb_f_type_,psb_f_ilu_k_,info) + call prec%prec%precset(psb_ilu_ialg_,psb_ilu_n_,info) + case("ILUT") + call prec%prec%precset(psb_f_type_,psb_f_ilu_t_,info) + call prec%prec%precset(psb_ilu_ialg_,psb_ilu_t_,info) + case("AINV") + call prec%prec%precset(psb_f_type_,psb_f_ainv_,info) + case("INVK") + call prec%prec%precset(psb_f_type_,psb_f_invk_,info) + case("INVT") + call prec%prec%precset(psb_f_type_,psb_f_invt_,info) + case default + ! Default to ILU(0) factorization + call prec%prec%precset(psb_f_type_,psb_f_ilu_n_,info) + call prec%prec%precset(psb_ilu_ialg_,psb_ilu_n_,info) + end select + case ("ILU_ALG") + select case (psb_toupper(trim(string))) + case ("MILU") + call prec%prec%precset(psb_ilu_ialg_,psb_milu_n_,info) + case default + ! Do nothing + end select + case ("ILUT_SCALE") + select case (psb_toupper(trim(string))) + case ("MAXVAL") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_maxval_,info) + case ("DIAG") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_diag_,info) + case ("ARWSUM") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_arwsum_,info) + case ("ARCSUM") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_arcsum_,info) + case ("ACLSUM") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_aclsum_,info) + case ("NONE") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_none_,info) + case default + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_none_,info) + end select + case ("AINV_ALG") + select case (psb_toupper(trim(string))) + case("LLK") + call prec%prec%precset(psb_ainv_alg_,psb_ainv_llk_,info) + case("SYM-LLK") + call prec%prec%precset(psb_ainv_alg_,psb_ainv_s_llk_,info) + case("STAB-LLK") + call prec%prec%precset(psb_ainv_alg_,psb_ainv_s_ft_llk_,info) + case("MLK","LMX") + call prec%prec%precset(psb_ainv_alg_,psb_ainv_mlk_,info) + case default + call prec%prec%precset(psb_ainv_alg_,psb_ainv_llk_,info) + end select + case default + + end select + +end subroutine psb_ccprecsetc diff --git a/prec/impl/psb_c_sp_drop.f90 b/prec/impl/psb_c_sp_drop.f90 new file mode 100644 index 00000000..fda59cc7 --- /dev/null +++ b/prec/impl/psb_c_sp_drop.f90 @@ -0,0 +1,168 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG-AINV, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +subroutine psb_c_sp_drop(idiag,nzrmax,sp_thresh,nz,iz,valz,info) + use psb_base_mod + implicit none + real(psb_spk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(in) :: idiag, nzrmax + integer(psb_ipk_), intent(inout) :: nz + integer(psb_ipk_), intent(inout) :: iz(:) + complex(psb_spk_), intent(inout) :: valz(:) + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: i, j, idf, nw + complex(psb_spk_) :: witem + integer(psb_ipk_) :: widx + complex(psb_spk_), allocatable :: xw(:) + integer(psb_ipk_), allocatable :: xwid(:), indx(:) + + + info = psb_success_ + + if (nz > min(size(iz),size(valz))) then + write(0,*) 'Serious size problem ',nz,size(iz),size(valz) + info = -2 + return + end if + allocate(xw(nz),xwid(nz),indx(nz),stat=info) + if (info /= psb_success_) then + write(psb_err_unit,*) ' Memory allocation failure in sp_drop',nz,info + return + endif + + ! Always keep the diagonal element + idf = -1 + do i=1, nz + if (iz(i) == idiag) then + idf = i + witem = valz(i) + widx = iz(i) + valz(i) = valz(1) + iz(i) = iz(1) + valz(1) = witem + iz(1) = widx + exit + end if + end do + + if (idf == -1) then + + xw(1:nz) = valz(1:nz) + call psb_qsort(xw(1:nz),indx(1:nz),dir=psb_asort_down_) + do i=1, nz + xwid(i) = iz(indx(i)) + end do + nw = min(nz,nzrmax) + do + if (nw <= 1) exit + if (abs(xw(nw)) < sp_thresh) then + nw = nw - 1 + else + exit + end if + end do + nw = max(nw, 1) + + else + + nw = nz-1 + + xw(1:nw) = valz(2:nz) + + call psb_qsort(xw(1:nw),indx(1:nw),dir=psb_asort_down_) + nw = min(nw,nzrmax-1) + do + if (nw <= 1) exit + if (abs(xw(nw)) < sp_thresh) then + nw = nw - 1 + else + exit + end if + end do + + do i=1, nw + xwid(i) = iz(1+indx(i)) + end do + nw = nw + 1 + xw(nw) = valz(1) + xwid(nw) = iz(1) + end if + + call psb_msort(xwid(1:nw),indx(1:nw),dir=psb_sort_up_) + + do i=1, nw + valz(i) = xw(indx(i)) + iz(i) = xwid(i) + end do + nz = nw + if (nz>nzrmax) write(0,*) 'in sp_drop: ',nw,nzrmax,nz + deallocate(xw,xwid,indx,stat=info) + if (info /= psb_success_) then + write(psb_err_unit,*) ' Memory deallocation failure in sp_drop',info + return + endif + return +end subroutine psb_c_sp_drop diff --git a/prec/impl/psb_c_sparsify.f90 b/prec/impl/psb_c_sparsify.f90 new file mode 100644 index 00000000..4962f337 --- /dev/null +++ b/prec/impl/psb_c_sparsify.f90 @@ -0,0 +1,260 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG-AINV, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +subroutine psb_c_sparsify(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,info,istart,iheap,ikr) + use psb_base_mod + implicit none + + real(psb_spk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(in) :: idiag, n, nzrmax + complex(psb_spk_), intent(inout) :: zw(:) + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), intent(out) :: iz(:) + complex(psb_spk_), intent(out) :: valz(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: istart + type(psb_i_heap), optional :: iheap + integer(psb_ipk_), optional :: ikr(:) + ! + integer(psb_ipk_) :: i, istart_, last_i, iret,k + complex(psb_spk_) :: witem + integer(psb_ipk_) :: widx + complex(psb_spk_), allocatable :: xw(:) + integer(psb_ipk_), allocatable :: xwid(:), indx(:) + type(psb_c_idx_heap) :: heap + + + info = psb_success_ + istart_ = 1 + if (present(istart)) istart_ = max(1,istart) + if (.false.) then + nz = 0 + do i=istart_, n + if ((i == idiag).or.(abs(zw(i)) >= sp_thresh)) then + nz = nz + 1 + iz(nz) = i + valz(nz) = zw(i) + end if + end do + + else + allocate(xw(nzrmax),xwid(nzrmax),indx(nzrmax),stat=info) + if (info /= psb_success_) then + return + end if + + call heap%init(info,dir=psb_asort_down_) + + ! Keep at least the diagonal + nz = 0 + + if (present(iheap)) then + if (.not.(present(ikr))) then + write(psb_err_unit,*) 'Error: if IHEAP then also IKR' + info = -1 + return + end if + last_i = -1 + do + call iheap%get_first(i,iret) + if (iret < 0) exit + ! An index may have been put on the heap more than once. + if (i == last_i) cycle + last_i = i + if (i == idiag) then + xw(1) = zw(i) + xwid(1) = i + else if (abs(zw(i)) >= sp_thresh) then + call heap%insert(zw(i),i,info) + end if + zw(i) = dzero + ikr(i) = 0 + end do + + else + + do i=istart_, n + if (i == idiag) then + xw(1) = zw(i) + xwid(1) = i + else if (abs(zw(i)) >= sp_thresh) then + call heap%insert(zw(i),i,info) + end if + zw(i) = dzero + end do + end if + + k = 1 + do + if (k == nzrmax) exit + call heap%get_first(witem,widx,info) + if (info == -1) then + info = psb_success_ + exit + endif + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + return + end if + k = k + 1 + xw(k) = witem + xwid(k) = widx + end do + call heap%free(info) + nz = k + call psb_msort(xwid(1:nz),indx(1:nz),dir=psb_sort_up_) +!!$ write(0,*) 'sparsify output for idiag ',idiag,' :',nz,sp_thresh + do i=1, nz + valz(i) = xw(indx(i)) + iz(i) = xwid(i) +!!$ write(0,*) ' ',iz(i),valz(i) + end do + + end if + + return + +end subroutine psb_c_sparsify + + +subroutine psb_c_sparsify_list(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,lhead,listv,ikr,info) + use psb_base_mod + implicit none + + real(psb_spk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(in) :: idiag, n, nzrmax + complex(psb_spk_), intent(inout) :: zw(:) + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), intent(out) :: iz(:) + complex(psb_spk_), intent(out) :: valz(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(inout) :: lhead, listv(:) + integer(psb_ipk_) :: ikr(:) + ! + integer(psb_ipk_) :: i, istart_, last_i, iret,k,current, next + complex(psb_spk_) :: witem + integer(psb_ipk_) :: widx + complex(psb_spk_), allocatable :: xw(:) + integer(psb_ipk_), allocatable :: xwid(:), indx(:) + + + info = psb_success_ + istart_ = 1 + allocate(xw(n),xwid(n),indx(n),stat=info) + + current = lhead + lhead = -1 + i = 0 + do while (current >0) + i = i + 1 + xw(i) = zw(current) + xwid(i) = current + + if (current == idiag) then + ! Bring the diagona into first position + witem = xw(1) + widx = xwid(1) + xw(1) = xw(i) + xwid(1) = xwid(i) + xw(i) = witem + xwid(i) = widx + end if + + zw(current) = dzero + ikr(current) = 0 + + next = listv(current) + listv(current) = -1 + current = next + end do + nz = i + if (nz > 2) call psb_hsort(xw(2:nz),ix=xwid(2:nz),& + & dir=psb_asort_down_,flag=psb_sort_keep_idx_) +!!$ write(0,*) 'Done first msort ' +!!$ write(0,*) ' after first msort for idiag ',idiag,' :',nz,sp_thresh +!!$ do i=1, nz +!!$ write(0,*) ' ',xwid(i),xw(i) +!!$ end do + + i = 2 + do while (i<=nz) + if (abs(xw(i)) < sp_thresh) exit + i = i + 1 + end do +!!$ write(0,*) 'NZ ',nz, i, nzrmax + nz = max(1,min(i-1,nzrmax)) + call psb_msort(xwid(1:nz),ix=indx(1:nz),dir=psb_sort_up_) +!!$ write(0,*) 'Done second msort ' + +!!$ write(0,*) 'sparsify output for idiag ',idiag,' :',nz,i,sp_thresh + do i=1, nz + valz(i) = xw(indx(i)) + iz(i) = xwid(i) +!!$ write(0,*) ' ',iz(i),valz(i) + end do + + return + +end subroutine psb_c_sparsify_list diff --git a/prec/impl/psb_crwclip.f90 b/prec/impl/psb_crwclip.f90 new file mode 100644 index 00000000..ade1171f --- /dev/null +++ b/prec/impl/psb_crwclip.f90 @@ -0,0 +1,90 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG-AINV, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +subroutine psb_c_rwclip(nz,ia,ja,val,imin,imax,jmin,jmax) + use psb_base_mod + + implicit none + integer(psb_ipk_), intent(inout) :: nz + integer(psb_ipk_), intent(inout) :: ia(*), ja(*) + complex(psb_spk_), intent(inout) :: val(*) + integer(psb_ipk_), intent(in) :: imin,imax,jmin,jmax + + integer(psb_ipk_) :: i,j + + j = 0 + do i=1, nz + if ((imin <= ia(i)).and.& + & (ia(i) <= imax).and.& + & (jmin <= ja(i)).and.& + & (ja(i) <= jmax) ) then + j = j + 1 + ia(j) = ia(i) + ja(j) = ja(i) + val(j) = val(i) + end if + end do + nz = j +end subroutine psb_c_rwclip diff --git a/prec/impl/psb_csparse_biconjg_llk.F90 b/prec/impl/psb_csparse_biconjg_llk.F90 new file mode 100644 index 00000000..98110cd1 --- /dev/null +++ b/prec/impl/psb_csparse_biconjg_llk.F90 @@ -0,0 +1,366 @@ +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +subroutine psb_csparse_biconjg_llk(n,a,p,z,w,nzrmax,sp_thresh,info) + use psb_base_mod + use psb_c_ainv_tools_mod + use psb_c_biconjg_mod, psb_protect_name => psb_csparse_biconjg_llk + ! + ! Left-looking variant + ! + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_c_csr_sparse_mat), intent(in) :: a + type(psb_c_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_spk_), intent(in) :: sp_thresh + complex(psb_spk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + ! Locals + integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:) + complex(psb_spk_), allocatable :: zval(:),val(:), q(:) + integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,& + & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,& + & ipj, lastj, nextj, nzw + type(psb_i_heap) :: heap, rheap + type(psb_c_csc_sparse_mat) :: ac + complex(psb_spk_) :: alpha + character(len=20) :: name='psb_orth_llk' + logical, parameter :: debug=.false. + + allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),stat=info) + if (info == psb_success_) call ac%cp_from_fmt(a,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + return + end if + ! + ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap. + ! izcr(i): flag rows to be used for the dot products. Used to minimize + ! traffic in rheap. + ! + do i=1,n + izkr(i) = 0 + izcr(i) = 0 + zval(i) = czero + end do + + ! Init z_1=e_1 and p_1=a_11 + p(1) = czero + i = 1 + nz = a%irp(i+1) - a%irp(i) + do j=1,nz + if (a%ja(j) == 1) then + p(1) = a%val(j) + exit + end if + end do + if (abs(p(1)) < s_epstol) & + & p(1) = 1.d-3 + + q(1) = p(1) + ! + ! + call z%allocate(n,n,n*nzrmax) + + z%icp(1) = 1 + z%icp(2) = 2 + z%ia(1) = 1 + z%val(1) = cone + nzz = 1 + + call w%allocate(n,n,n*nzrmax) + w%icp(1) = 1 + w%icp(2) = 2 + w%ia(1) = 1 + w%val(1) = cone + nzw = 1 + + do i = 2, n + if (debug) write(0,*) 'Main loop iteration ',i,n + + ! + ! Update loop on Z. + ! Must be separated from update loop of W because of + ! the conflict on J that would result. + ! + + ! ZVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = czero + ! !$ end do + zval(i) = cone + izkr(i) = 1 + call heap%init(info) + if (info == psb_success_) call heap%insert(i,info) + + if (info == psb_success_) call rheap%init(info) + + do j = ac%icp(i), ac%icp(i+1)-1 + if (ac%ia(j) < i) then +!!$ write(0,*) i, ' Outer inserting ',ac%ia(j) + if (info == psb_success_) call rheap%insert(ac%ia(j),info) + izcr(ac%ia(j)) = 1 + end if + end do + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_init_heap') + return + end if + + ! Update loop + ! The idea is to keep track of the indices of the nonzeros in zval, + ! so as to only do the dot products on the rows which have nonzeros + ! in their positions; to do this we keep an extra + ! copy of A in CSC, and the row indices to be considered are in rheap. + lastj = -1 + outer: do + inner: do + call rheap%get_first(j,info) + if (debug) write(0,*) 'from get_first: ',j,info + if (info == -1) exit outer ! Empty heap + if (j > lastj) then + lastj = j + exit inner + end if + end do inner + + izcr(j) = 0 + if (j>=i) cycle outer + if (debug) write(0,*) 'update loop, using row: ',j,i + ip1 = a%irp(j) + ip2 = a%irp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (a%ja(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval) + ! ! write(psb_err_unit,*) j,i,p(i) + + alpha = (-p(i)/p(j)) +!!$ write(0,*) 'At step ',i,j,' p(i) ',p(i),alpha + if (.false..or.(abs(alpha) > sp_thresh)) then +!!$ write(0,*) 'At step ',i,j,' range ',z%icp(j), z%icp(j+1)-1, & +!!$ & ' vals ',z%ia(z%icp(j):z%icp(j+1)-1) + do k=z%icp(j), z%icp(j+1)-1 + kr = z%ia(k) + zval(kr) = zval(kr) + alpha*z%val(k) + if (izkr(kr) == 0) then +!!$ write(0,*) ' main inner Inserting ',kr + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj lastj) then + lastj = j + exit innerw + end if + end do innerw + izcr(j) = 0 + if (j>=i) cycle outerw + if (debug) write(0,*) 'update loop, using row: ',j + ip1 = ac%icp(j) + ip2 = ac%icp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (ac%ia(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-q(i)/q(j)) + if (.false..or.(abs(alpha) > sp_thresh)) then + + do k=w%icp(j), w%icp(j+1)-1 + kr = w%ia(k) + zval(kr) = zval(kr) + alpha*w%val(k) + if (izkr(kr) == 0) then + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj psb_csparse_biconjg_llk_noth + + ! + ! Left-looking variant, with NO drop rule on p(i)/p(j) + ! + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_c_csr_sparse_mat), intent(in) :: a + type(psb_c_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_spk_), intent(in) :: sp_thresh + complex(psb_spk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + ! Locals + integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:) + complex(psb_spk_), allocatable :: zval(:),val(:), q(:) + integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,& + & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,& + & ipj, lastj, nextj, nzw + type(psb_i_heap) :: heap, rheap + type(psb_c_csc_sparse_mat) :: ac + real(psb_dpk_) :: alpha + character(len=20) :: name='psb_orth_llk_noth' + logical, parameter :: debug=.false. + + allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),stat=info) + if (info == psb_success_) call ac%cp_from_fmt(a,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + return + end if + ! + ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap. + ! izcr(i): flag rows to be used for the dot products. Used to minimize + ! traffic in rheap. + ! + do i=1,n + izkr(i) = 0 + izcr(i) = 0 + zval(i) = czero + end do + + ! Init z_1=e_1 and p_1=a_11 + p(1) = czero + i = 1 + nz = a%irp(i+1) - a%irp(i) + do j=1,nz + if (a%ja(j) == 1) then + p(1) = a%val(j) + exit + end if + end do + if (abs(p(1)) < s_epstol) & + & p(1) = 1.d-3 + + q(1) = p(1) + ! + ! + call z%allocate(n,n,n*nzrmax) + + z%icp(1) = 1 + z%icp(2) = 2 + z%ia(1) = 1 + z%val(1) = done + nzz = 1 + + call w%allocate(n,n,n*nzrmax) + w%icp(1) = 1 + w%icp(2) = 2 + w%ia(1) = 1 + w%val(1) = done + nzw = 1 + + do i = 2, n + if (debug) write(0,*) 'Main loop iteration ',i,n + + ! + ! Update loop on Z. + ! Must be separated from update loop of W because of + ! the conflict on J that would result. + ! + + ! ZVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = czero + ! !$ end do + zval(i) = done + izkr(i) = 1 + call heap%init(info) + if (info == psb_success_) call heap%insert(i,info) + + if (info == psb_success_) call rheap%init(info) + do j = ac%icp(i), ac%icp(i+1)-1 + if (ac%ia(j) < i) then + if (info == psb_success_) call rheap%insert(ac%ia(j),info) + izcr(ac%ia(j)) = 1 + end if + end do + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_init_heap') + return + end if + + ! Update loop + ! The idea is to keep track of the indices of the nonzeros in zval, + ! so as to only do the dot products on the rows which have nonzeros + ! in their positions; to do this we keep an extra + ! copy of A in CSC, and the row indices to be considered are in rheap. + lastj = -1 + outer: do + inner: do + call rheap%get_first(j,info) + if (debug) write(0,*) 'from get_first: ',j,info + if (info == -1) exit outer ! Empty heap + if (j > lastj) then + lastj = j + exit inner + end if + end do inner + + izcr(j) = 0 + if (j>=i) cycle outer + if (debug) write(0,*) 'update loop, using row: ',j,i + ip1 = a%irp(j) + ip2 = a%irp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (a%ja(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-p(i)/p(j)) + + if (.true.) then + do k=z%icp(j), z%icp(j+1)-1 + kr = z%ia(k) + zval(kr) = zval(kr) + alpha*z%val(k) + if (izkr(kr) == 0) then + + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj lastj) then + lastj = j + exit innerw + end if + end do innerw + izcr(j) = 0 + if (j>=i) cycle outerw + if (debug) write(0,*) 'update loop, using row: ',j + ip1 = ac%icp(j) + ip2 = ac%icp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (ac%ia(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-q(i)/q(j)) + if (.true.) then + + do k=w%icp(j), w%icp(j+1)-1 + kr = w%ia(k) + zval(kr) = zval(kr) + alpha*w%val(k) + if (izkr(kr) == 0) then + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj psb_csparse_biconjg_mlk + ! + ! Left-looking variant + ! + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_c_csr_sparse_mat), intent(in) :: a + type(psb_c_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_spk_), intent(in) :: sp_thresh + complex(psb_spk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + ! Locals + integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:), hlist(:), bfr(:), rwlist(:) + complex(psb_spk_), allocatable :: zval(:),val(:), q(:) + integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,& + & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,& + & ipj, lastj, nextj, nzw, hlhead, li, mj, kkc, ifrst, ilst, rwhead + type(psb_i_heap) :: heap, rheap + type(psb_c_csc_sparse_mat) :: ac + complex(psb_spk_) :: alpha + character(len=20) :: name='psb_biconjg_mlk' + logical, parameter :: debug=.false., test_merge=.true. + + allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n), & + & hlist(n),rwlist(n),bfr(n),stat=info) + if (info == psb_success_) call ac%cp_from_fmt(a,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + return + end if + ! + ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap. + ! izcr(i): flag rows to be used for the dot products. Used to minimize + ! traffic in rheap. + ! + do i=1,n + izkr(i) = 0 + izcr(i) = 0 + zval(i) = czero + hlist(i) = -1 + rwlist(i) = -1 + end do + + ! Init z_1=e_1 and p_1=a_11 + p(1) = czero + i = 1 + nz = a%irp(i+1) - a%irp(i) + do j=1,nz + if (a%ja(j) == 1) then + p(1) = a%val(j) + exit + end if + end do + if (abs(p(1)) < s_epstol) & + & p(1) = 1.d-3 + + q(1) = p(1) + ! + ! + call z%allocate(n,n,n*nzrmax) + + z%icp(1) = 1 + z%icp(2) = 2 + z%ia(1) = 1 + z%val(1) = cone + nzz = 1 + + call w%allocate(n,n,n*nzrmax) + w%icp(1) = 1 + w%icp(2) = 2 + w%ia(1) = 1 + w%val(1) = cone + nzw = 1 + + + do i = 2, n + if (debug) write(0,*) 'Main loop iteration ',i,n + + ! + ! Update loop on Z. + ! Must be separated from update loop of W because of + ! the conflict on J that would result. + ! + + ! ZVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = czero + ! !$ end do + zval(i) = cone + izkr(i) = 1 + rwhead = i + + hlhead = -1 + + kkc = 0 + ilst = ac%icp(i)-1 + ifrst = ac%icp(i) + do j = ac%icp(i+1)-1, ac%icp(i), -1 + if (ac%ia(j) < i) then + ilst = j + exit + end if + end do + kkc = ilst-ifrst+1 + + if (.true..or.debug) then +!!$ write(0,*) 'Outer Before insertion : ',hlhead + call printlist(hlhead,hlist) + end if + if (kkc > 0) then +!!$ write(0,*) i,' Outer Inserting : ',kkc,':',ac%ia(ifrst:ilst) + + !call hlmerge(hlhead,hlist,bfr(1:kkc)) + call hlmerge(hlhead,hlist,ac%ia(ifrst:ilst)) + end if + if (debug) then + write(0,*) 'Outer After insertion: ',hlhead + call printlist(hlhead,hlist) + end if + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='init_lists') + return + end if + + ! Update loop + ! The idea is to keep track of the indices of the nonzeros in zval, + ! so as to only do the dot products on the rows which have nonzeros + ! in their positions; to do this we keep an extra + ! copy of A in CSC, and the row indices to be considered are in rheap. + lastj = -1 + outer: do + mj = hlhead + if (mj > 0) then + hlhead = hlist(mj) + hlist(mj) = -1 + end if + j = mj + if (j < 0) exit outer + + izcr(j) = 0 + if (j>=i) cycle outer + + if (debug) write(0,*) 'update loop, using row: ',j,i,mj + ip1 = a%irp(j) + ip2 = a%irp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (a%ja(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-p(i)/p(j)) +!!$ write(0,*) 'At step ',i,j,' p(i) ',p(i),alpha +!!$ write(0,*) ' Current list is : ',hlhead +!!$ call printlist(hlhead,hlist) +!!$ + + + if (.false..or.(abs(alpha) > sp_thresh)) then + ifrst=z%icp(j) + ilst=z%icp(j+1)-1 + call hlmerge(rwhead,rwlist,z%ia(ifrst:ilst)) +!!$ write(0,*) 'At step ',i,j,' range ',z%icp(j), z%icp(j+1)-1, & +!!$ & ' vals ',z%ia(z%icp(j):z%icp(j+1)-1) + + do k=z%icp(j), z%icp(j+1)-1 + kr = z%ia(k) + zval(kr) = zval(kr) + alpha*z%val(k) + + if (izkr(kr) == 0) then +!!$ write(0,*) ' main inner Inserting ',kr +!!$ call hlmerge(rwhead,rwlist,(/kr/)) + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj)) then + ifrst = min(ifrst,kc ) + ilst = max(ilst,kc) + end if + end do + kkc = ilst-ifrst+1 + if (debug) then + write(0,*) 'Inner Before insertion: ' + call printlist(hlhead,hlist) + write(0,*) 'Inner Inserting : ',kkc,':',ac%ia(ifrst:ilst) + end if + if (ilst >= ifrst) then +!!$ write(0,*) j,i,' Inner inserting ',ac%ia(ifrst:ilst) + call hlmerge(hlhead,hlist,ac%ia(ifrst:ilst)) + end if + + if (debug) then + write(0,*) 'Inner After insertion: ',hlhead + call printlist(hlhead,hlist) + end if + + end if + if (info /= psb_success_) exit + end do + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + return + end if + end if + end do outer + call a%csget(i,i,nzra,ia,ja,val,info) + call rwclip(nzra,ia,ja,val,ione,n,ione,n) + p(i) = psb_spge_dot(nzra,ja,val,zval) + if (abs(p(i)) < s_epstol) & + & p(i) = 1.d-3 + +!!$ write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh + + ! + ! Sparsify current ZVAL and put into ZMAT + ! + call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,rwhead,rwlist,izkr,info) + + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='sparsify') + return + end if + call psb_ensure_size(nzz+nzrz, z%ia, info) + call psb_ensure_size(nzz+nzrz, z%val, info) + ipz1 = z%icp(i) + do j=1, nzrz + z%ia(ipz1 + j -1) = ia(j) + z%val(ipz1 + j -1) = val(j) + end do + z%icp(i+1) = ipz1 + nzrz + nzz = nzz + nzrz + + + ! WVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = czero + ! !$ end do + zval(i) = cone + izkr(i) = 1 + rwhead = i + hlhead = -1 + + kkc = 0 + ilst = a%irp(i)-1 + ifrst = a%irp(i) + do j = a%irp(i+1)-1, a%irp(i), -1 + if (a%ja(j) < i) then + ilst = j + exit + end if + end do + kkc = ilst-ifrst+1 + + if (debug) then + write(0,*) 'Outer Before insertion: ' + call printlist(hlhead,hlist) + write(0,*) 'Outer Inserting : ',kkc,':',a%ja(ifrst:ilst) + end if + if (kkc > 0 ) then + !call hlmerge(hlhead,hlist,bfr(1:kkc)) + call hlmerge(hlhead,hlist,a%ja(ifrst:ilst)) + end if + if (debug) then + write(0,*) 'Outer After insertion: ',hlhead + call printlist(hlhead,hlist) + end if + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='init_lists') + return + end if + + ! Update loop + ! The idea is to keep track of the indices of the nonzeros in zval, + ! so as to only do the dot products on the rows which have nonzeros + ! in their positions; to do this we keep an extra + ! copy of A in CSC, and the row indices to be considered are in rheap. + lastj = -1 + outerw: do + mj = hlhead + if (hlhead > 0) then + hlhead = hlist(mj) + hlist(mj) = -1 + end if + j = mj + if (j < 0) exit outerw + + izcr(j) = 0 + if (j>=i) cycle outerw + if (debug) write(0,*) 'update loop, using row: ',j + ip1 = ac%icp(j) + ip2 = ac%icp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (ac%ia(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-q(i)/q(j)) + if (.false..or.(abs(alpha) > sp_thresh)) then + ifrst=w%icp(j) + ilst=w%icp(j+1)-1 + call hlmerge(rwhead,rwlist,w%ia(ifrst:ilst)) + + do k=w%icp(j), w%icp(j+1)-1 + kr = w%ia(k) + zval(kr) = zval(kr) + alpha*w%val(k) + if (izkr(kr) == 0) then + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj)) then + ifrst = min(ifrst,kc ) + ilst = max(ilst,kc) + end if + end do + kkc = ilst-ifrst+1 + if (debug) then + write(0,*) 'Inner Before insertion: ' + call printlist(hlhead,hlist) + write(0,*) 'Inner Inserting : ',kkc,':',a%ja(ifrst:ilst) + end if + + call hlmerge(hlhead,hlist,a%ja(ifrst:ilst)) + + if (debug) then + write(0,*) 'Inner After insertion: ',hlhead + call printlist(hlhead,hlist) + end if + if (debug) write(0,*) 'update loop, adding indices: ',& + & a%ja(a%irp(kr):a%irp(kr+1)-1) + + end if + if (info /= psb_success_) exit + end do + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + return + end if + end if + end do outerw + ip1 = ac%icp(i) + ip2 = ac%icp(i+1) - 1 + do + if (ip2 < ip1 ) exit + if (ac%ia(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval) + if (abs(q(i)) < s_epstol) & + & q(i) = 1.d-3 + +!!$ write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh + ! + ! Sparsify current ZVAL and put into ZMAT + ! + call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,rwhead,rwlist,izkr,info) + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='sparsify') + return + end if + call psb_ensure_size(nzw+nzrz, w%ia, info) + call psb_ensure_size(nzw+nzrz, w%val, info) + ipz1 = w%icp(i) + do j=1, nzrz + w%ia(ipz1 + j -1) = ia(j) + w%val(ipz1 + j -1) = val(j) + end do + w%icp(i+1) = ipz1 + nzrz + nzw = nzw + nzrz + + end do + +contains + + subroutine hlmerge(head,listv,vals) + integer(psb_ipk_), intent(inout) :: head, listv(:) + integer(psb_ipk_), intent(in) :: vals(:) + integer(psb_ipk_) :: i,j,k, lh, lv, nv, vv, flh, ph + + nv = size(vals) + lh = head + flh = -1 + lv = 1 + if ((head < 0).and.(nv > 0)) then + ! Adjust head if empty + head = vals(lv) + lv = lv + 1 + else if ((head > 0) .and. (nv >0)) then + ! Adjust head if first item less than it + if (head > vals(lv)) then + listv(vals(lv)) = head + head = vals(lv) + lv = lv + 1 + end if + end if + + lh = head + ph = lh + do while ((lh > 0) .and. (lv <= nv)) + if (lh == vals(lv)) then + lv = lv + 1 + else if (lh > vals(lv)) then + listv(vals(lv)) = lh + listv(ph) = vals(lv) + lh = vals(lv) + lv = lv + 1 + else + ph = lh + lh = listv(lh) + end if + end do + lh = ph + do while (lv <= nv) + listv(lh) = vals(lv) + lh = listv(lh) + lv = lv + 1 + end do + end subroutine hlmerge + + + subroutine printlist(head,listv) + integer(psb_ipk_), intent(in) :: head, listv(:) + integer(psb_ipk_) :: li + + li = head + do while (li > 0) + write(0,*) 'Item: ', li + li = listv(li) + end do + end subroutine printlist + +end subroutine psb_csparse_biconjg_mlk diff --git a/prec/impl/psb_csparse_biconjg_s_ft_llk.F90 b/prec/impl/psb_csparse_biconjg_s_ft_llk.F90 new file mode 100644 index 00000000..64af2ed5 --- /dev/null +++ b/prec/impl/psb_csparse_biconjg_s_ft_llk.F90 @@ -0,0 +1,414 @@ +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +subroutine psb_csparse_biconjg_s_ft_llk(n,a,p,z,w,nzrmax,sp_thresh,info) + use psb_base_mod + use psb_c_ainv_tools_mod + use psb_c_biconjg_mod, psb_protect_name => psb_csparse_biconjg_s_ft_llk + + ! + ! Left-looking variant, stabilized i.e. product by A is applied + ! to compute the diagonal elements. + ! + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_c_csr_sparse_mat), intent(in) :: a + type(psb_c_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_spk_), intent(in) :: sp_thresh + complex(psb_spk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + ! Locals + integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:),iww(:) + complex(psb_spk_), allocatable :: zval(:),val(:), q(:), ww(:) + integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj, nzww,& + & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,& + & ipj, lastj, nextj, nzw, nzrw + type(psb_i_heap) :: heap, rheap + type(psb_c_csc_sparse_mat) :: ac + complex(psb_spk_) :: alpha, tmpq,tmpq2 + character(len=20) :: name='psb_orth_llk' + logical, parameter :: debug=.false. + + allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),iww(n),ww(n),stat=info) + if (info == psb_success_) call ac%cp_from_fmt(a,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + return + end if + ! + ! Init pointers to: + ! ljr(i): last occupied column index within row I + ! izcr(i): first occupied row index within column I + ! + do i=1,n + izkr(i) = 0 + izcr(i) = 0 + zval(i) = czero + end do + + ! Init z_1=e_1 and p_1=a_11 + p(1) = czero + i = 1 + nz = a%irp(i+1) - a%irp(i) + do j=1,nz + if (a%ja(j) == 1) then + p(1) = a%val(j) + exit + end if + end do + if (abs(p(1)) < s_epstol) & + & p(1) = 1.d-3 + + q(1) = p(1) + ! + ! + call z%allocate(n,n,n*nzrmax) + + z%icp(1) = 1 + z%icp(2) = 2 + z%ia(1) = 1 + z%val(1) = cone + nzz = 1 + + call w%allocate(n,n,n*nzrmax) + w%icp(1) = 1 + w%icp(2) = 2 + w%ia(1) = 1 + w%val(1) = cone + nzw = 1 + + do i = 2, n + if (debug) write(0,*) 'Main loop iteration ',i,n + + ! + ! Update loop on Z. + ! Must be separated from update loop of W because of + ! the conflict on J that would result. + ! + + ! ZVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = czero + ! !$ end do + zval(i) = cone + izkr(i) = 1 + call heap%init(info) + if (info == psb_success_) call heap%insert(i,info) + if (info == psb_success_) call rheap%init(info) + do j = ac%icp(i), ac%icp(i+1)-1 + if (ac%ia(j) lastj) then + lastj = j + exit inner + end if + end do inner + izcr(j) = 0 + if (j>=i) exit outer + if (debug) write(0,*) 'update loop, using row: ',j + ip1 = w%icp(j) + ip2 = w%icp(j+1) - 1 + nzra = max(0,ip2 - ip1 + 1) + nzww = 0 + call psb_c_spvspm(cone,a,nzra,w%ia(ip1:ip2),w%val(ip1:ip2),& + & czero,nzww,iww,ww,info) + + p(i) = psb_spge_dot(nzww,iww,ww,zval) + + ipz1 = z%icp(j) + ipz2 = z%icp(j+1) + nzrz = ipz2-ipz1 + alpha = (-p(i)/p(j)) +!!$ write(0,*) ' p(i)/p(j) ',i,j,alpha,p(i),p(j) + if (.false..or.(abs(alpha) > sp_thresh)) then + + do k=ipz1, ipz2-1 + kr = z%ia(k) + zval(kr) = zval(kr) + alpha*z%val(k) + if (izkr(kr) == 0) then + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj lastj) then + lastj = j + exit innerw + end if + end do innerw + izcr(j) = 0 + if (j>=i) exit outerw + if (debug) write(0,*) 'update loop, using row: ',j + if (.false.) then + ip1 = ac%icp(j) + ip2 = ac%icp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (ac%ia(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + else + ip1 = z%icp(j) + ip2 = z%icp(j+1) - 1 + nzra = max(0,ip2 - ip1 + 1) + nzww = 0 + call psb_c_spmspv(cone,ac,nzra,z%ia(ip1:ip2),z%val(ip1:ip2),& + & czero,nzww,iww,ww,info) + + q(i) = psb_spge_dot(nzww,iww,ww,zval) + end if + + ipz1 = w%icp(j) + ipz2 = w%icp(j+1) + nzrz = ipz2-ipz1 + alpha = (-q(i)/q(j)) +!!$ write(0,*) ' q(i)/q(j) ',i,j,alpha,q(i),q(j) + if (.false..or.(abs(alpha) > sp_thresh)) then + + do k=ipz1, ipz2-1 + kr = w%ia(k) + zval(kr) = zval(kr) + alpha*w%val(k) + if (izkr(kr) == 0) then + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj psb_csparse_biconjg_s_llk + + ! + ! Left-looking variant SYMMETRIC/HERMITIAN A. You have been warned! + ! + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_c_csr_sparse_mat), intent(in) :: a + type(psb_c_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_spk_), intent(in) :: sp_thresh + complex(psb_spk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + ! Locals + integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:) + complex(psb_spk_), allocatable :: zval(:),val(:), q(:) + integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,& + & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,& + & ipj, lastj, nextj, nzw,kk + type(psb_i_heap) :: heap, rheap + type(psb_c_csc_sparse_mat) :: ac + complex(psb_spk_) :: alpha, zvalmax + character(len=20) :: name='psb_orth_llk' + logical, parameter :: debug=.false. + + allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),stat=info) + if (info == psb_success_) call ac%cp_from_fmt(a,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + return + end if + ! + ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap. + ! izcr(i): flag rows to be used for the dot products. Used to minimize + ! traffic in rheap. + ! + do i=1,n + izkr(i) = 0 + izcr(i) = 0 + zval(i) = czero + end do + + ! Init z_1=e_1 and p_1=a_11 + p(1) = czero + i = 1 + nz = a%irp(i+1) - a%irp(i) + do j=1,nz + if (a%ja(j) == 1) then + p(1) = a%val(j) + exit + end if + end do + if (abs(p(1)) < s_epstol) & + & p(1) = 1.d-3 + + ! + ! + call z%allocate(n,n,n*nzrmax) + + z%icp(1) = 1 + z%icp(2) = 2 + z%ia(1) = 1 + z%val(1) = cone + nzz = 1 + zvalmax = cone + + do i = 2, n + if (debug) write(0,*) 'Main loop iteration ',i,n + + ! + ! Update loop on Z. + ! Must be separated from update loop of W because of + ! the conflict on J that would result. + ! + + ! ZVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = czero + ! !$ end do + zval(i) = cone + izkr(i) = 1 + call heap%init(info) + if (info == psb_success_) call heap%insert(i,info) + + if (info == psb_success_) call rheap%init(info) + do j = ac%icp(i), ac%icp(i+1)-1 + if (ac%ia(j) < i) then + if (info == psb_success_) call rheap%insert(ac%ia(j),info) + izcr(ac%ia(j)) = 1 + end if + end do + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_init_heap') + return + end if + + ! Update loop + ! The idea is to keep track of the indices of the nonzeros in zval, + ! so as to only do the dot products on the rows which have nonzeros + ! in their positions; to do this we keep an extra + ! copy of A in CSC, and the row indices to be considered are in rheap. + lastj = -1 + outer: do + inner: do + call rheap%get_first(j,info) + if (debug) write(0,*) 'from get_first: ',j,info + if (info == -1) exit outer ! Empty heap + if (j > lastj) then + lastj = j + exit inner + end if + end do inner + + izcr(j) = 0 + if (j>=i) cycle outer + if (debug) write(0,*) 'update loop, using row: ',j,i + ip1 = a%irp(j) + ip2 = a%irp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (a%ja(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-p(i)/p(j)) + + if (.false..or.(abs(alpha) > sp_thresh)) then + do k=z%icp(j), z%icp(j+1)-1 + kr = z%ia(k) + zval(kr) = zval(kr) + alpha*z%val(k) +!!$ if (abs(zval(kr)) > 1e16) then +!!$ write(0,*) i,j,p(i),p(j),alpha,z%val(k),alpha*z%val(k),kr,zval(kr) +!!$ end if + if (izkr(kr) == 0) then + + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' start' + + iscale_ = psb_ilu_scale_none_ + if (present(iscale)) iscale_ = iscale + weight = done + ! + ! Check the memory available to hold the W and Z factors + ! and allocate it if needed + ! + nrow_a = a%get_nrows() + nztota = a%get_nzeros() + + if (present(blck)) then + nztota = nztota + blck%get_nzeros() + end if + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & ': out get_nnzeros',nrow_a,nztota,& + & a%get_nrows(),a%get_ncols(),a%get_nzeros() + + + n_row = desc%get_local_rows() + allocate(pq(n_row),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + + nzrmax = fillin + sp_thresh = thresh + + ! + ! Ok, let's start first with Z (i.e. Upper) + ! + call a%csclip(acoo,info,imax=n_row,jmax=n_row) + call acsr%mv_from_coo(acoo,info) + select case(iscale_) + case(psb_ilu_scale_none_) + ! Ok, do nothing. + + case(psb_ilu_scale_maxval_) + weight = acsr%maxval() + weight = done/weight + call acsr%scal(weight,info) + + case(psb_ilu_scale_arcsum_) + allocate(arws(n_row),acls(n_row),ad(n_row),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + call acsr%arwsum(arws) + call acsr%aclsum(acls) + ad(1:n_row) = sqrt(sqrt(arws(1:n_row)*acls(1:n_row))) + ad(1:n_row) = done/ad(1:n_row) + call acsr%scal(ad,info,side='L') + call acsr%scal(ad,info,side='R') + case default + call psb_errpush(psb_err_from_subroutine_,name,a_err='wrong iscale') + goto 9999 + end select + + ! + ! Here for the actual workhorses. + ! Only biconjg is surviving for now.... + ! + call psb_sparse_biconjg(alg,n_row,acsr,pq,& + & zmat,wmat,nzrmax,sp_thresh,info) + ! Done. Hopefully.... + + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='sparse_biconjg') + goto 9999 + end if + call atmp%mv_from(acsr) + + ! + ! Is this right??? + ! + do i=1, n_row + if (abs(pq(i)) < d_epstol) then + pq(i) = done + else + pq(i) = done/pq(i) + end if + end do + + select case(iscale_) + case(psb_ilu_scale_none_) + ! Ok, do nothing. + case(psb_ilu_scale_maxval_) + pq(:) = pq(:)*weight + + case(psb_ilu_scale_arcsum_) + call zmat%scal(ad,info,side='L') + call wmat%scal(ad,info,side='R') + + case default + call psb_errpush(psb_err_from_subroutine_,name,a_err='wrong iscale') + goto 9999 + end select + + call psb_move_alloc(pq,d,info) + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' end' + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_d_ainv_bld diff --git a/prec/impl/psb_d_bjacprec_impl.f90 b/prec/impl/psb_d_bjacprec_impl.f90 index 08409346..ec866dbe 100644 --- a/prec/impl/psb_d_bjacprec_impl.f90 +++ b/prec/impl/psb_d_bjacprec_impl.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,13 +27,13 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! subroutine psb_d_bjac_dump(prec,info,prefix,head) use psb_base_mod use psb_d_bjacprec, psb_protect_name => psb_d_bjac_dump - implicit none + implicit none class(psb_d_bjac_prec_type), intent(in) :: prec integer(psb_ipk_), intent(out) :: info character(len=*), intent(in), optional :: prefix,head @@ -43,13 +43,13 @@ subroutine psb_d_bjac_dump(prec,info,prefix,head) character(len=80) :: prefix_ character(len=120) :: fname ! len should be at least 20 more than - ! len of prefix_ + ! len of prefix_ info = 0 ctxt = prec%get_ctxt() call psb_info(ctxt,iam,np) - if (present(prefix)) then + if (present(prefix)) then prefix_ = trim(prefix(1:min(len(prefix),len(prefix_)))) else prefix_ = "dump_fact_d" @@ -74,7 +74,7 @@ end subroutine psb_d_bjac_dump subroutine psb_d_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) use psb_base_mod use psb_d_bjacprec, psb_protect_name => psb_d_bjac_apply_vect - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_d_bjac_prec_type), intent(inout) :: prec real(psb_dpk_),intent(in) :: alpha,beta @@ -118,12 +118,12 @@ subroutine psb_d_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) n_row = desc_data%get_local_rows() n_col = desc_data%get_local_cols() - if (x%get_nrows() < n_row) then + if (x%get_nrows() < n_row) then info = 36; ierr(1) = 2; ierr(2) = n_row; call psb_errpush(info,name,i_err=ierr) goto 9999 end if - if (y%get_nrows() < n_row) then + if (y%get_nrows() < n_row) then info = 36; ierr(1) = 3; ierr(2) = n_row; call psb_errpush(info,name,i_err=ierr) goto 9999 @@ -140,9 +140,9 @@ subroutine psb_d_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) end if - if (n_col <= size(work)) then + if (n_col <= size(work)) then ww => work(1:n_col) - if ((4*n_col+n_col) <= size(work)) then + if ((4*n_col+n_col) <= size(work)) then aux => work(n_col+1:) else allocate(aux(4*n_col),stat=info) @@ -152,19 +152,19 @@ subroutine psb_d_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) allocate(ww(n_col),aux(4*n_col),stat=info) endif - if (info /= psb_success_) then + if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 + goto 9999 end if do_alloc_wrk = .not.prec%is_allocated_wrk() if (do_alloc_wrk) call prec%allocate_wrk(info,vmold=x%v) associate (wv => prec%wrk(1), wv1 => prec%wrk(2)) - + select case(prec%iprcparm(psb_f_type_)) - case(psb_f_ilu_n_) - + case(psb_f_ilu_n_,psb_f_ilu_k_,psb_f_ilu_t_) + select case(trans_) case('N') call psb_spsm(done,prec%av(psb_l_pr_),x,dzero,wv,desc_data,info,& @@ -172,31 +172,58 @@ subroutine psb_d_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) if(info == psb_success_) call psb_spsm(alpha,prec%av(psb_u_pr_),wv,& & beta,y,desc_data,info,& & trans=trans_,scale='U',choice=psb_none_, work=aux) - + case('T') call psb_spsm(done,prec%av(psb_u_pr_),x,dzero,wv,desc_data,info,& & trans=trans_,scale='L',diag=prec%dv,choice=psb_none_, work=aux) if(info == psb_success_) call psb_spsm(alpha,prec%av(psb_l_pr_),wv,& & beta,y,desc_data,info,& & trans=trans_,scale='U',choice=psb_none_,work=aux) - + case('C') - + call psb_spsm(done,prec%av(psb_u_pr_),x,dzero,wv,desc_data,info,& & trans=trans_,scale='U',choice=psb_none_, work=aux) - + call wv1%mlt(done,prec%dv,wv,dzero,info,conjgx=trans_) - + if(info == psb_success_) call psb_spsm(alpha,prec%av(psb_l_pr_),wv1,& & beta,y,desc_data,info,& & trans=trans_,scale='U',choice=psb_none_,work=aux) - + + end select + if (info /= psb_success_) then + ch_err="psb_spsm" + goto 9999 + end if + + case(psb_f_ainv_,psb_f_invt_,psb_f_invk_) + ! Application of approximate inverse preconditioner, just some spmm + + select case(trans_) + case('N') + call psb_spmm(done,prec%av(psb_l_pr_),x,dzero,wv,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + + if (info == psb_success_) call wv1%mlt(done,prec%dv,wv,dzero,info) + if(info == psb_success_) & + & call psb_spmm(alpha,prec%av(psb_u_pr_),wv1,& + & beta,y,desc_data,info, trans=trans_, work=aux,doswap=.false.) + + case('T','C') + call psb_spmm(done,prec%av(psb_l_pr_),x,dzero,wv,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + if (info == psb_success_) call wv1%mlt(done,prec%dv,wv,dzero,info) + if (info == psb_success_) & + & call psb_spmm(alpha,prec%av(psb_u_pr_),wv1, & + & beta,y,desc_data,info,trans=trans_,work=aux,doswap=.false.) + end select - if (info /= psb_success_) then + if (info /= psb_success_) then ch_err="psb_spsm" goto 9999 end if - + case default info = psb_err_internal_error_ call psb_errpush(info,name,a_err='Invalid factorization') @@ -204,12 +231,12 @@ subroutine psb_d_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) end select end associate - + call psb_halo(y,desc_data,info,data=psb_comm_mov_) if (do_alloc_wrk) call prec%free_wrk(info) - if (n_col <= size(work)) then - if ((4*n_col+n_col) <= size(work)) then + if (n_col <= size(work)) then + if ((4*n_col+n_col) <= size(work)) then else deallocate(aux) endif @@ -231,7 +258,7 @@ end subroutine psb_d_bjac_apply_vect subroutine psb_d_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) use psb_base_mod use psb_d_bjacprec, psb_protect_name => psb_d_bjac_apply - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_d_bjac_prec_type), intent(inout) :: prec real(psb_dpk_),intent(in) :: alpha,beta @@ -244,6 +271,7 @@ subroutine psb_d_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) ! Local variables integer(psb_ipk_) :: n_row,n_col real(psb_dpk_), pointer :: ww(:), aux(:) + type(psb_d_vect_type) :: tx,ty type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me integer(psb_ipk_) :: err_act, ierr(5) @@ -273,12 +301,12 @@ subroutine psb_d_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) n_row = desc_data%get_local_rows() n_col = desc_data%get_local_cols() - if (size(x) < n_row) then + if (size(x) < n_row) then info = 36; ierr(1) = 2; ierr(2) = n_row; call psb_errpush(info,name,i_err=ierr) goto 9999 end if - if (size(y) < n_row) then + if (size(y) < n_row) then info = 36; ierr(1) = 3; ierr(2) = n_row; call psb_errpush(info,name,i_err=ierr) goto 9999 @@ -295,29 +323,29 @@ subroutine psb_d_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) end if - if (n_col <= size(work)) then + if (n_col <= size(work)) then ww => work(1:n_col) - if ((4*n_col+n_col) <= size(work)) then + if ((4*n_col+n_col) <= size(work)) then aux => work(n_col+1:) else allocate(aux(4*n_col),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 + goto 9999 end if endif else allocate(ww(n_col),aux(4*n_col),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 + goto 9999 end if endif select case(prec%iprcparm(psb_f_type_)) - case(psb_f_ilu_n_) + case(psb_f_ilu_n_,psb_f_ilu_k_,psb_f_ilu_t_) select case(trans_) case('N') @@ -344,11 +372,42 @@ subroutine psb_d_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) & trans=trans_,scale='U',choice=psb_none_,work=aux) end select - if (info /= psb_success_) then + if (info /= psb_success_) then ch_err="psb_spsm" goto 9999 end if + case(psb_f_ainv_,psb_f_invt_,psb_f_invk_) + ! Application of approximate inverse preconditioner, just some spmm + + select case(trans_) + + case('N') + call psb_spmm(done,prec%av(psb_l_pr_),x,dzero,ww,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + ww(1:n_row) = ww(1:n_row) * prec%dv%v%v(1:n_row) + if (info == psb_success_) & + & call psb_spmm(alpha,prec%av(psb_u_pr_),ww,beta,y,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + + case('T') + call psb_spmm(done,prec%av(psb_u_pr_),x,dzero,ww,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + ww(1:n_row) = ww(1:n_row) * prec%dv%v%v(1:n_row) + if (info == psb_success_) & + & call psb_spmm(alpha,prec%av(psb_l_pr_),ww,beta,y,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + + case('C') + call psb_spmm(done,prec%av(psb_u_pr_),x,dzero,ww,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + ww(1:n_row) = ww(1:n_row) * (prec%dv%v%v(1:n_row)) + if (info == psb_success_) & + & call psb_spmm(alpha,prec%av(psb_l_pr_),ww,beta,y,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + + end select + case default info = psb_err_internal_error_ @@ -358,8 +417,8 @@ subroutine psb_d_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) call psb_halo(y,desc_data,info,data=psb_comm_mov_) - if (n_col <= size(work)) then - if ((4*n_col+n_col) <= size(work)) then + if (n_col <= size(work)) then + if ((4*n_col+n_col) <= size(work)) then else deallocate(aux) endif @@ -397,11 +456,26 @@ subroutine psb_d_bjac_precinit(prec,info) call psb_Errpush(info,name) goto 9999 end if + call psb_realloc(psb_rfpsz,prec%rprcparm,info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_Errpush(info,name) + goto 9999 + end if prec%iprcparm(:) = 0 prec%iprcparm(psb_p_type_) = psb_bjac_ prec%iprcparm(psb_f_type_) = psb_f_ilu_n_ prec%iprcparm(psb_ilu_fill_in_) = 0 + prec%iprcparm(psb_ilu_ialg_) = psb_ilu_n_ + prec%iprcparm(psb_ilu_scale_) = psb_ilu_scale_none_ + prec%iprcparm(psb_inv_fillin_) = 0 + prec%iprcparm(psb_ainv_alg_) = psb_ainv_llk_ + + + prec%rprcparm(:) = 0 + prec%rprcparm(psb_fact_eps_) = 1E-1_psb_dpk_ + prec%rprcparm(psb_inv_thresh_) = 1E-1_psb_dpk_ call psb_erractionrestore(err_act) @@ -416,7 +490,8 @@ end subroutine psb_d_bjac_precinit subroutine psb_d_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) use psb_base_mod - use psb_prec_mod, only : psb_ilu_fct + use psb_d_ilu_fact_mod + use psb_d_ainv_fact_mod use psb_d_bjacprec, psb_protect_name => psb_d_bjac_precbld Implicit None @@ -428,12 +503,13 @@ subroutine psb_d_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) class(psb_d_base_vect_type), intent(in), optional :: vmold class(psb_i_base_vect_type), intent(in), optional :: imold - ! .. Local Scalars .. - integer(psb_ipk_) :: i, m + ! .. Local Scalars .. + integer(psb_ipk_) :: i, m, ialg, fill_in, iscale, inv_fill, iinvalg integer(psb_ipk_) :: ierr(5) character :: trans, unitd - type(psb_d_csr_sparse_mat), allocatable :: lf, uf + type(psb_dspmat_type), allocatable :: lf, uf real(psb_dpk_), allocatable :: dd(:) + real(psb_dpk_) :: fact_eps, inv_thresh integer(psb_ipk_) :: nztota, err_act, n_row, nrow_a,n_col, nhalo type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me @@ -462,19 +538,114 @@ subroutine psb_d_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) trans = 'N' unitd = 'U' + ! We check if all the information contained in the preconditioner structure + ! are meaningful, otherwise we give an error and get out of the build + ! procedure + ialg = prec%iprcparm(psb_ilu_ialg_) ! Integer for ILU type algorithm + iinvalg = prec%iprcparm(psb_ainv_alg_) ! Integer for AINV type algorithm + iscale = prec%iprcparm(psb_ilu_scale_) ! Integer for scaling of matrix + fact_eps = prec%rprcparm(psb_fact_eps_) ! Drop-tolerance for factorization + inv_thresh = prec%rprcparm(psb_inv_thresh_) ! Drop-tolerance for inverse + fill_in = prec%iprcparm(psb_ilu_fill_in_) ! Fill-In for factorization + inv_fill = prec%iprcparm(psb_inv_fillin_) ! Fill-In for inverse factorization + + ! Check if the type of scaling is known, pay attention that not all the + ! scalings make sense for all the factorization, if something that does not + ! make sense is required the factorization routine will fail in an + ! unnrecoverable way. + if ((iscale == psb_ilu_scale_none_).or.& + (iscale == psb_ilu_scale_maxval_).or.& + (iscale == psb_ilu_scale_diag_).or.& + (iscale == psb_ilu_scale_arwsum_).or.& + (iscale == psb_ilu_scale_aclsum_).or.& + (iscale == psb_ilu_scale_arcsum_)) then + ! Do nothing: admissible request + else + info=psb_err_from_subroutine_ + ch_err='psb_ilu_scale_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + select case (prec%iprcparm(psb_f_type_)) + case (psb_f_ainv_) + ! Check if the variant for the AINV is known to the library + select case (iinvalg) + case(psb_ainv_llk_,psb_ainv_s_llk_,psb_ainv_s_ft_llk_,psb_ainv_llk_noth_,& + & psb_ainv_mlk_) + ! Do nothing these are okay + case default + info=psb_err_from_subroutine_ + ch_err='psb_ainv_alg_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end select ! AINV Variant + ! Check if the drop-tolerance make sense + if( inv_thresh > 1) then + info=psb_err_from_subroutine_ + ch_err='psb_inv_thresh_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + case (psb_f_ilu_t_) + if (fact_eps > 1) then + ! Check if the drop-tolerance make sense + info=psb_err_from_subroutine_ + ch_err='psb_fact_eps_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + case (psb_f_invt_) + ! Check both tolerances + if (fact_eps > 1) then + info=psb_err_from_subroutine_ + ch_err='psb_fact_eps_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if( inv_thresh > 1) then + info=psb_err_from_subroutine_ + ch_err='psb_inv_thresh_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + case default + end select + + + ! Checks relative to the fill-in parameters + if (prec%iprcparm(psb_f_type_) == psb_f_ilu_n_) then + if(fill_in < 0) then + info=psb_err_from_subroutine_ + ch_err='psb_ilu_fill_in_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + else if (fill_in == 0) then + ! If the requested level of fill is equal to zero, we default to the + ! specialized ILU(0) routine + prec%iprcparm(psb_f_type_) = psb_f_ilu_n_ + end if + end if + ! If no limit on the fill_in is required we allow every fill, this is needed + ! since this quantity is used to allocate the auxiliary vectors for the + ! factorization + if (inv_fill <= 0) inv_fill = m + + ! Select on the type of factorization to be used select case(prec%iprcparm(psb_f_type_)) - case(psb_f_ilu_n_) + case(psb_f_ilu_n_) + ! ILU(0) Factorization: the total number of nonzeros of the factorized matrix + ! is equal to the one of the input matrix - if (allocated(prec%av)) then - if (size(prec%av) < psb_bp_ilu_avsz) then - do i=1,size(prec%av) + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) call prec%av(i)%free() enddo deallocate(prec%av,stat=info) endif end if - if (.not.allocated(prec%av)) then + if (.not.allocated(prec%av)) then allocate(prec%av(psb_max_avsz),stat=info) if (info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) @@ -501,27 +672,27 @@ subroutine psb_d_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) end if allocate(dd(n_row),stat=info) - if (info == psb_success_) then + if (info == psb_success_) then allocate(prec%dv, stat=info) - if (info == 0) then - if (present(vmold)) then - allocate(prec%dv%v,mold=vmold,stat=info) - else - allocate(psb_d_base_vect_type :: prec%dv%v,stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_d_base_vect_type :: prec%dv%v,stat=info) end if end if end if - if (info /= psb_success_) then + if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 + goto 9999 endif - ! This is where we have no renumbering, thus no need - call psb_ilu_fct(a,lf,uf,dd,info) + ! This is where we have no renumbering, thus no need + call psb_ilu0_fact(ialg,a,lf,uf,dd,info) if(info == psb_success_) then - call prec%av(psb_l_pr_)%mv_from(lf) - call prec%av(psb_u_pr_)%mv_from(uf) + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) call prec%av(psb_l_pr_)%set_asb() call prec%av(psb_u_pr_)%set_asb() call prec%av(psb_l_pr_)%trim() @@ -530,12 +701,375 @@ subroutine psb_d_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) ! call move_alloc(dd,prec%d) else info=psb_err_from_subroutine_ - ch_err='psb_ilu_fct' + ch_err='psb_ilu0_fact' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if - case(psb_f_none_) + case(psb_f_ilu_k_) + ! ILU(N) Incomplete LU-factorization with N levels of fill-in. Depending on + ! the type of the variant of the algorithm the may be forgotten or added to + ! the diagonal (MILU) + + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) + call prec%av(i)%free() + enddo + deallocate(prec%av,stat=info) + endif + end if + if (.not.allocated(prec%av)) then + allocate(prec%av(psb_max_avsz),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + endif + + nrow_a = desc_a%get_local_rows() + nztota = a%get_nzeros() + + n_col = desc_a%get_local_cols() + nhalo = n_col-nrow_a + n_row = nrow_a + + allocate(lf,uf,stat=info) + if (info == psb_success_) call lf%allocate(n_row,n_row,nztota) + if (info == psb_success_) call uf%allocate(n_row,n_row,nztota) + + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(dd(n_row),stat=info) + if (info == psb_success_) then + allocate(prec%dv, stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_d_base_vect_type :: prec%dv%v,stat=info) + end if + end if + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + endif + ! This is where we have no renumbering, thus no need + call psb_iluk_fact(fill_in,ialg,a,lf,uf,dd,info) + + if(info == psb_success_) then + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) + call prec%av(psb_l_pr_)%set_asb() + call prec%av(psb_u_pr_)%set_asb() + call prec%av(psb_l_pr_)%trim() + call prec%av(psb_u_pr_)%trim() + call prec%dv%bld(dd) + ! call move_alloc(dd,prec%d) + else + info=psb_err_from_subroutine_ + ch_err='psb_iluk_fact' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(psb_f_ilu_t_) + ! ILU(N,E) Incomplete LU factorization with thresholding and level of fill + + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) + call prec%av(i)%free() + enddo + deallocate(prec%av,stat=info) + endif + end if + if (.not.allocated(prec%av)) then + allocate(prec%av(psb_max_avsz),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + endif + + nrow_a = desc_a%get_local_rows() + nztota = a%get_nzeros() + + n_col = desc_a%get_local_cols() + nhalo = n_col-nrow_a + n_row = nrow_a + + allocate(lf,uf,stat=info) + if (info == psb_success_) call lf%allocate(n_row,n_row,nztota) + if (info == psb_success_) call uf%allocate(n_row,n_row,nztota) + + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(dd(n_row),stat=info) + if (info == psb_success_) then + allocate(prec%dv, stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_d_base_vect_type :: prec%dv%v,stat=info) + end if + end if + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + endif + ! This is where we have no renumbering, thus no need + call psb_ilut_fact(fill_in,fact_eps,a,lf,uf,dd,info,iscale=iscale) + + if(info == psb_success_) then + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) + call prec%av(psb_l_pr_)%set_asb() + call prec%av(psb_u_pr_)%set_asb() + call prec%av(psb_l_pr_)%trim() + call prec%av(psb_u_pr_)%trim() + call prec%dv%bld(dd) + ! call move_alloc(dd,prec%d) + else + info=psb_err_from_subroutine_ + ch_err='psb_ilut_fact' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(psb_f_ainv_) + ! Approximate Inverse Factorizations based on variants of the incomplete + ! biconjugation algorithms + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) + call prec%av(i)%free() + enddo + deallocate(prec%av,stat=info) + endif + end if + if (.not.allocated(prec%av)) then + allocate(prec%av(psb_max_avsz),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + endif + + nrow_a = desc_a%get_local_rows() + nztota = a%get_nzeros() + + n_col = desc_a%get_local_cols() + nhalo = n_col-nrow_a + n_row = nrow_a + + allocate(lf,uf,stat=info) + if (info == psb_success_) call lf%allocate(n_row,n_row,nztota) + if (info == psb_success_) call uf%allocate(n_row,n_row,nztota) + + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(dd(n_row),stat=info) + if (info == psb_success_) then + allocate(prec%dv, stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_d_base_vect_type :: prec%dv%v,stat=info) + end if + end if + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + endif + ! Computing the factorization + call psb_ainv_fact(a,iinvalg,inv_fill,inv_thresh,lf,dd,uf,desc_a,info,iscale=iscale) + + if(info == psb_success_) then + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) + call prec%av(psb_l_pr_)%set_asb() + call prec%av(psb_u_pr_)%set_asb() + call prec%av(psb_l_pr_)%trim() + call prec%av(psb_u_pr_)%trim() + call prec%dv%bld(dd) + ! call move_alloc(dd,prec%d) + else + info=psb_err_from_subroutine_ + ch_err='psb_ilut_fact' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(psb_f_invk_) + ! Approximate Inverse Factorizations based on the sparse inversion of + ! triangular factors of an ILU(n) factorization + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) + call prec%av(i)%free() + enddo + deallocate(prec%av,stat=info) + endif + end if + if (.not.allocated(prec%av)) then + allocate(prec%av(psb_max_avsz),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + endif + + nrow_a = desc_a%get_local_rows() + nztota = a%get_nzeros() + + n_col = desc_a%get_local_cols() + nhalo = n_col-nrow_a + n_row = nrow_a + + allocate(lf,uf,stat=info) + if (info == psb_success_) call lf%allocate(n_row,n_row,nztota) + if (info == psb_success_) call uf%allocate(n_row,n_row,nztota) + + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(dd(n_row),stat=info) + if (info == psb_success_) then + allocate(prec%dv, stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_d_base_vect_type :: prec%dv%v,stat=info) + end if + end if + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + endif + ! Computing the factorization + call psb_invk_fact(a,fill_in, inv_fill,lf,dd,uf,desc_a,info) + + if(info == psb_success_) then + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) + call prec%av(psb_l_pr_)%set_asb() + call prec%av(psb_u_pr_)%set_asb() + call prec%av(psb_l_pr_)%trim() + call prec%av(psb_u_pr_)%trim() + call prec%dv%bld(dd) + ! call move_alloc(dd,prec%d) + else + info=psb_err_from_subroutine_ + ch_err='psb_ilut_fact' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(psb_f_invt_) + ! Approximate Inverse Factorizations based on the sparse inversion of + ! triangular factors of an ILU(eps) factorization + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) + call prec%av(i)%free() + enddo + deallocate(prec%av,stat=info) + endif + end if + if (.not.allocated(prec%av)) then + allocate(prec%av(psb_max_avsz),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + endif + + nrow_a = desc_a%get_local_rows() + nztota = a%get_nzeros() + + n_col = desc_a%get_local_cols() + nhalo = n_col-nrow_a + n_row = nrow_a + + allocate(lf,uf,stat=info) + if (info == psb_success_) call lf%allocate(n_row,n_row,nztota) + if (info == psb_success_) call uf%allocate(n_row,n_row,nztota) + + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(dd(n_row),stat=info) + if (info == psb_success_) then + allocate(prec%dv, stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_d_base_vect_type :: prec%dv%v,stat=info) + end if + end if + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + endif + ! Computing the factorization + call psb_invt_fact(a,fill_in,inv_fill,fact_eps,inv_thresh,lf,dd,uf,& + & desc_a,info) + + if(info == psb_success_) then + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) + call prec%av(psb_l_pr_)%set_asb() + call prec%av(psb_u_pr_)%set_asb() + call prec%av(psb_l_pr_)%trim() + call prec%av(psb_u_pr_)%trim() + call prec%dv%bld(dd) + ! call move_alloc(dd,prec%d) + else + info=psb_err_from_subroutine_ + ch_err='psb_ilut_fact' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(psb_f_none_) info=psb_err_from_subroutine_ ch_err='Inconsistent prec psb_f_none_' call psb_errpush(info,name,a_err=ch_err) @@ -548,7 +1082,7 @@ subroutine psb_d_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) goto 9999 end select - if (present(amold)) then + if (present(amold)) then call prec%av(psb_l_pr_)%cscnv(info,mold=amold) call prec%av(psb_u_pr_)%cscnv(info,mold=amold) end if @@ -567,8 +1101,8 @@ subroutine psb_d_bjac_precseti(prec,what,val,info) Implicit None class(psb_d_bjac_prec_type),intent(inout) :: prec - integer(psb_ipk_), intent(in) :: what - integer(psb_ipk_), intent(in) :: val + integer(psb_ipk_), intent(in) :: what + integer(psb_ipk_), intent(in) :: val integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act, nrow character(len=20) :: name='d_bjac_precset' @@ -576,34 +1110,33 @@ subroutine psb_d_bjac_precseti(prec,what,val,info) call psb_erractionsave(err_act) info = psb_success_ - if (.not.allocated(prec%iprcparm)) then + if (.not.allocated(prec%iprcparm)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") goto 9999 end if select case(what) - case (psb_f_type_) - if (prec%iprcparm(psb_p_type_) /= psb_bjac_) then - write(psb_err_unit,*) 'WHAT is invalid for current preconditioner ',& - & prec%iprcparm(psb_p_type_),& - & 'ignoring user specification' - return - endif + case (psb_f_type_) prec%iprcparm(psb_f_type_) = val - case (psb_ilu_fill_in_) - if ((prec%iprcparm(psb_p_type_) /= psb_bjac_).or.& - & (prec%iprcparm(psb_f_type_) /= psb_f_ilu_n_)) then - write(psb_err_unit,*) 'WHAT is invalid for current preconditioner ',& - & prec%iprcparm(psb_p_type_),& - & 'ignoring user specification' - return - endif + case (psb_ilu_fill_in_) prec%iprcparm(psb_ilu_fill_in_) = val + case (psb_ilu_ialg_) + prec%iprcparm(psb_ilu_ialg_) = val + + case (psb_ilu_scale_) + prec%iprcparm(psb_ilu_scale_) = val + + case (psb_ainv_alg_) + prec%iprcparm(psb_ainv_alg_) = val + + case(psb_inv_fillin_) + prec%iprcparm(psb_inv_fillin_) = val + case default - write(psb_err_unit,*) 'WHAT is invalid, ignoring user specification' + write(psb_err_unit,'(i0," is invalid, ignoring user specification")') what end select @@ -613,3 +1146,45 @@ subroutine psb_d_bjac_precseti(prec,what,val,info) 9999 call psb_error_handler(err_act) return end subroutine psb_d_bjac_precseti + +subroutine psb_d_bjac_precsetr(prec,what,val,info) + + use psb_base_mod + use psb_d_bjacprec, psb_protect_name => psb_d_bjac_precsetr + Implicit None + + class(psb_d_bjac_prec_type),intent(inout) :: prec + integer(psb_ipk_), intent(in) :: what + real(psb_dpk_), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act, nrow + character(len=20) :: name='d_bjac_precset' + + call psb_erractionsave(err_act) + + info = psb_success_ + if (.not.allocated(prec%rprcparm)) then + info = 1124 + call psb_errpush(info,name,a_err="preconditioner") + goto 9999 + end if + + select case(what) + + case (psb_fact_eps_) + prec%rprcparm(psb_fact_eps_) = val + + case (psb_inv_thresh_) + prec%rprcparm(psb_inv_thresh_) = val + + case default + write(psb_err_unit,'(i0," is invalid, ignoring user specification")') what + + end select + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_d_bjac_precsetr diff --git a/prec/impl/psb_d_ilu0_fact.f90 b/prec/impl/psb_d_ilu0_fact.f90 index 4ec9ffb1..478eedfa 100644 --- a/prec/impl/psb_d_ilu0_fact.f90 +++ b/prec/impl/psb_d_ilu0_fact.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,21 +27,21 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! +! ! Moved here from MLD2P4, original copyright below. -! -! -! +! +! +! ! MLD2P4 version 2.2 ! MultiLevel Domain Decomposition Parallel Preconditioners Package ! based on PSBLAS (Parallel Sparse BLAS version 3.5) -! -! (C) Copyright 2008-2018 -! -! Salvatore Filippone -! Pasqua D'Ambra -! Daniela di Serafino -! +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -53,7 +53,7 @@ ! 3. The name of the MLD2P4 group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -65,8 +65,8 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! ! File: psb_dilu0_fact.f90 ! ! Subroutine: psb_dilu0_fact @@ -75,7 +75,7 @@ ! ! This routine computes either the ILU(0) or the MILU(0) factorization of ! the diagonal blocks of a distributed matrix. These factorizations are used -! to build the 'base preconditioner' (block-Jacobi preconditioner/solver, +! to build the 'base preconditioner' (block-Jacobi preconditioner/solver, ! Additive Schwarz preconditioner) corresponding to a given level of a ! multilevel preconditioner. ! @@ -83,10 +83,10 @@ ! Y. Saad, Iterative Methods for Sparse Linear Systems, Second Edition, ! SIAM, 2003, Chapter 10. ! -! The local matrix is stored into a and blck, as specified in the description -! of the arguments below. The storage format for both the L and U factors is CSR. -! The diagonal of the U factor is stored separately (actually, the inverse of the -! diagonal entries is stored; this is then managed in the solve stage associated +! The local matrix is stored into a and blck, as specified in the description +! of the arguments below. The storage format for both the L and U factors is CSR. +! The diagonal of the U factor is stored separately (actually, the inverse of the +! diagonal entries is stored; this is then managed in the solve stage associated ! to the ILU(0)/MILU(0) factorization). ! ! The routine copies and factors "on the fly" from a and blck into l (L factor), @@ -94,7 +94,7 @@ ! ! This implementation of ILU(0)/MILU(0) is faster than the implementation in ! psb_ziluk_fct (the latter routine performs the more general ILU(k)/MILU(k)). -! +! ! ! Arguments: ! ialg - integer, input. @@ -121,7 +121,7 @@ ! factorization. ! Note: its allocation is managed by the calling routine psb_ilu_bld, ! hence it cannot be only intent(out). -! info - integer, output. +! info - integer, output. ! Error code. ! blck - type(psb_dspmat_type), input, optional, target. ! The sparse matrix structure containing the remote rows of the @@ -129,7 +129,7 @@ ! to build an Additive Schwarz base preconditioner with overlap ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck is empty. -! +! subroutine psb_dilu0_fact(ialg,a,l,u,d,info,blck, upd) use psb_base_mod @@ -157,30 +157,30 @@ subroutine psb_dilu0_fact(ialg,a,l,u,d,info,blck, upd) info = psb_success_ call psb_erractionsave(err_act) - ! + ! ! Point to / allocate memory for the incomplete factorization ! - if (present(blck)) then + if (present(blck)) then blck_ => blck else - allocate(blck_,stat=info) - if (info == psb_success_) call blck_%csall(izero,izero,info,ione) + allocate(blck_,stat=info) + if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR') if (info /= psb_success_) then info=psb_err_from_subroutine_ - ch_err='csall' + ch_err='allocate' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if endif - if (present(upd)) then - upd_ = psb_toupper(upd) + if (present(upd)) then + upd_ = psb_toupper(upd) else upd_ = 'F' end if - + m = a%get_nrows() + blck_%get_nrows() if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.& - & (m > size(d)) ) then + & (m > size(d)) ) then write(0,*) 'Wrong allocation status for L,D,U? ',& & l%get_nrows(),size(d),u%get_nrows() info = -1 @@ -212,12 +212,12 @@ subroutine psb_dilu0_fact(ialg,a,l,u,d,info,blck, upd) call u%set_triangle() call u%set_unit() call u%set_upper() - + ! ! Nullify pointer / deallocate memory ! - if (present(blck)) then - blck_ => null() + if (present(blck)) then + blck_ => null() else call blck_%free() if(info.ne.0) then @@ -226,7 +226,7 @@ subroutine psb_dilu0_fact(ialg,a,l,u,d,info,blck, upd) call psb_errpush(info,name,a_err=ch_err) goto 9999 end if - deallocate(blck_) + deallocate(blck_) endif call psb_erractionrestore(err_act) @@ -243,9 +243,9 @@ contains ! Version: real ! Note: internal subroutine of psb_dilu0_fact. ! - ! This routine computes either the ILU(0) or the MILU(0) factorization of the - ! diagonal blocks of a distributed matrix. - ! These factorizations are used to build the 'base preconditioner' + ! This routine computes either the ILU(0) or the MILU(0) factorization of the + ! diagonal blocks of a distributed matrix. + ! These factorizations are used to build the 'base preconditioner' ! (block-Jacobi preconditioner/solver, Additive Schwarz ! preconditioner) corresponding to a given level of a multilevel preconditioner. ! @@ -257,7 +257,7 @@ contains ! ! The routine copies and factors "on the fly" from the sparse matrix structures a ! and b into the arrays lval, uval, d (L, U without its diagonal, diagonal of U). - ! + ! ! ! Arguments: ! ialg - integer, input. @@ -296,7 +296,7 @@ contains ! according to the CSR storage format. ! lirp - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the L factor in lval, according to the CSR storage format. + ! of the L factor in lval, according to the CSR storage format. ! uval - real(psb_dpk_), dimension(:), input/output. ! The U factor in the incomplete factorization. ! The entries of U are stored according to the CSR format. @@ -305,18 +305,18 @@ contains ! according to the CSR storage format. ! uirp - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the U factor in uval, according to the CSR storage format. + ! of the U factor in uval, according to the CSR storage format. ! l1 - integer, output. ! The number of nonzero entries in lval. ! l2 - integer, output. ! The number of nonzero entries in uval. - ! info - integer, output. + ! info - integer, output. ! Error code. ! subroutine psb_dilu0_factint(ialg,a,b,& & d,lval,lja,lirp,uval,uja,uirp,l1,l2,upd,info) - implicit none + implicit none ! Arguments integer(psb_ipk_), intent(in) :: ialg @@ -332,7 +332,7 @@ contains real(psb_dpk_) :: dia,temp integer(psb_ipk_), parameter :: nrb=16 type(psb_d_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) + integer(psb_ipk_) :: int_err(5) character(len=20) :: name, ch_err name='psb_dilu0_factint' @@ -346,10 +346,10 @@ contains select case(ialg) case(psb_ilu_n_,psb_milu_n_) - ! Ok + ! Ok case default info=psb_err_input_asize_invalid_i_ - call psb_errpush(info,name,& + call psb_errpush(info,name,& & i_err=(/ione,ialg,izero,izero,izero/)) goto 9999 end select @@ -363,7 +363,7 @@ contains end if m = ma+mb - if (psb_toupper(upd) == 'F' ) then + if (psb_toupper(upd) == 'F' ) then lirp(1) = 1 uirp(1) = 1 l1 = 0 @@ -379,14 +379,14 @@ contains if (i <= ma) then ! ! Copy the i-th local row of the matrix, stored in a, - ! into lval/d(i)/uval + ! into lval/d(i)/uval ! call ilu_copyin(i,ma,a,i,ione,m,l1,lja,lval,& & d(i),l2,uja,uval,ktrw,trw,upd) else ! ! Copy the i-th local row of the matrix, stored in b - ! (as (i-ma)-th row), into lval/d(i)/uval + ! (as (i-ma)-th row), into lval/d(i)/uval ! call ilu_copyin(i-ma,mb,b,i,ione,m,l1,lja,lval,& & d(i),l2,uja,uval,ktrw,trw,upd) @@ -437,7 +437,7 @@ contains ! dia = dia - temp*uval(jj) cycle updateloop - ! + ! else if (j > i) then ! ! search u(i,*) (i-th row of U) for a matching index j @@ -454,23 +454,23 @@ contains end if enddo end if - ! - ! If we get here we missed the cycle updateloop, which means + ! + ! If we get here we missed the cycle updateloop, which means ! that this entry does not match; thus we accumulate on the ! diagonal for MILU(0). ! - if (ialg == psb_milu_n_) then + if (ialg == psb_milu_n_) then dia = dia - temp*uval(jj) end if enddo updateloop enddo - ! + ! ! Check the pivot size - ! + ! if (abs(dia) < d_epstol) then ! ! Too small pivot: unstable factorization - ! + ! info = psb_err_pivot_too_small_ int_err(1) = i write(ch_err,'(g20.10)') abs(dia) @@ -493,7 +493,7 @@ contains else write(0,*) 'Update not implemented ' info = 31 - call psb_errpush(info,name,& + call psb_errpush(info,name,& & i_err=(/ione*13,izero,izero,izero,izero/),a_err=upd) goto 9999 @@ -515,7 +515,7 @@ contains ! Version: real ! Note: internal subroutine of psb_dilu0_fact ! - ! This routine copies a row of a sparse matrix A, stored in the psb_dspmat_type + ! This routine copies a row of a sparse matrix A, stored in the psb_dspmat_type ! data structure a, into the arrays lval and uval and into the scalar variable ! dia, corresponding to the lower and upper triangles of A and to the diagonal ! entry of the row, respectively. The entries in lval and uval are stored @@ -529,14 +529,14 @@ contains ! ! The routine is used by psb_dilu0_factint in the computation of the ILU(0)/MILU(0) ! factorization of a local sparse matrix. - ! + ! ! TODO: modify the routine to allow copying into output L and U that are ! already filled with indices; this would allow computing an ILU(k) pattern, - ! then use the ILU(0) internal for subsequent calls with the same pattern. + ! then use the ILU(0) internal for subsequent calls with the same pattern. ! ! Arguments: ! i - integer, input. - ! The local index of the row to be extracted from the + ! The local index of the row to be extracted from the ! sparse matrix structure a. ! m - integer, input. ! The number of rows of the local matrix stored into a. @@ -570,13 +570,13 @@ contains ! to the CSR storage format. ! uval - real(psb_dpk_), dimension(:), input/output. ! The array where the entries of the row corresponding to the - ! upper triangle are copied. + ! upper triangle are copied. ! ktrw - integer, input/output. ! The index identifying the last entry taken from the ! staging buffer trw. See below. ! trw - type(psb_dspmat_type), input/output. ! A staging buffer. If the matrix A is not in CSR format, we use - ! the psb_sp_getblk routine and store its output in trw; when we + ! the psb_sp_getblk routine and store its output in trw; when we ! need to call psb_sp_getblk we do it for a block of rows, and then ! we consume them from trw in successive calls to this routine, ! until we empty the buffer. Thus we will make a call to psb_sp_getblk @@ -608,10 +608,10 @@ contains if (psb_errstatus_fatal()) then info = psb_err_internal_error_; goto 9999 end if - if (psb_toupper(upd) == 'F') then + if (psb_toupper(upd) == 'F') then - select type(aa => a%a) - type is (psb_d_csr_sparse_mat) + select type(aa => a%a) + type is (psb_d_csr_sparse_mat) ! ! Take a fast shortcut if the matrix is stored in CSR format @@ -633,19 +633,19 @@ contains end if enddo - class default + class default ! - ! Otherwise use psb_sp_getblk, slower but able (in principle) of + ! Otherwise use psb_sp_getblk, slower but able (in principle) of ! handling any format. In this case, a block of rows is extracted ! instead of a single row, for performance reasons, and these ! rows are copied one by one into lval, dia, uval, through ! successive calls to ilu_copyin. ! - if ((mod(i,nrb) == 1).or.(nrb == 1)) then + if ((mod(i,nrb) == 1).or.(nrb == 1)) then irb = min(m-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) + call aa%csget(i,i+irb-1,trw,info) if(info /= psb_success_) then info=psb_err_from_subroutine_ ch_err='csget' @@ -656,7 +656,7 @@ contains end if nz = trw%get_nzeros() - do + do if (ktrw > nz) exit if (trw%ia(ktrw) > i) exit k = trw%ja(ktrw) @@ -680,7 +680,7 @@ contains write(0,*) 'Update not implemented ' info = 31 - call psb_errpush(info,name,& + call psb_errpush(info,name,& & i_err=(/ione*13,izero,izero,izero,izero/),a_err=upd) goto 9999 diff --git a/prec/impl/psb_d_iluk_fact.f90 b/prec/impl/psb_d_iluk_fact.f90 index 6d644e42..544ec987 100644 --- a/prec/impl/psb_d_iluk_fact.f90 +++ b/prec/impl/psb_d_iluk_fact.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,21 +27,21 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! +! ! Moved here from MLD2P4, original copyright below. -! -! -! +! +! +! ! MLD2P4 version 2.2 ! MultiLevel Domain Decomposition Parallel Preconditioners Package ! based on PSBLAS (Parallel Sparse BLAS version 3.5) -! -! (C) Copyright 2008-2018 -! -! Salvatore Filippone -! Pasqua D'Ambra -! Daniela di Serafino -! +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -53,7 +53,7 @@ ! 3. The name of the MLD2P4 group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -65,8 +65,8 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! ! File: psb_diluk_fact.f90 ! ! Subroutine: psb_diluk_fact @@ -74,7 +74,7 @@ ! Contains: psb_diluk_factint, iluk_copyin, iluk_fact, iluk_copyout. ! ! This routine computes either the ILU(k) or the MILU(k) factorization of the -! diagonal blocks of a distributed matrix. These factorizations are used to +! diagonal blocks of a distributed matrix. These factorizations are used to ! build the 'base preconditioner' (block-Jacobi preconditioner/solver, ! Additive Schwarz preconditioner) corresponding to a certain level of a ! multilevel preconditioner. @@ -88,7 +88,7 @@ ! U factors is CSR. The diagonal of the U factor is stored separately (actually, ! the inverse of the diagonal entries is stored; this is then managed in the solve ! stage associated to the ILU(k)/MILU(k) factorization). -! +! ! ! Arguments: ! fill_in - integer, input. @@ -118,7 +118,7 @@ ! factorization. ! Note: its allocation is managed by the calling routine psb_ilu_bld, ! hence it cannot be only intent(out). -! info - integer, output. +! info - integer, output. ! Error code. ! blck - type(psb_dspmat_type), input, optional, target. ! The sparse matrix structure containing the remote rows of the @@ -126,7 +126,7 @@ ! to build an Additive Schwarz base preconditioner with overlap ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck does not contain any row. -! +! subroutine psb_diluk_fact(fill_in,ialg,a,l,u,d,info,blck) use psb_base_mod @@ -143,7 +143,7 @@ subroutine psb_diluk_fact(fill_in,ialg,a,l,u,d,info,blck) real(psb_dpk_), intent(inout) :: d(:) ! Local Variables integer(psb_ipk_) :: l1, l2, m, err_act - + type(psb_dspmat_type), pointer :: blck_ type(psb_d_csr_sparse_mat) :: ll, uu character(len=20) :: name, ch_err @@ -152,17 +152,17 @@ subroutine psb_diluk_fact(fill_in,ialg,a,l,u,d,info,blck) info = psb_success_ call psb_erractionsave(err_act) - ! + ! ! Point to / allocate memory for the incomplete factorization ! - if (present(blck)) then + if (present(blck)) then blck_ => blck else - allocate(blck_,stat=info) - if (info == psb_success_) call blck_%csall(izero,izero,info,ione) + allocate(blck_,stat=info) + if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR') if (info /= psb_success_) then info=psb_err_from_subroutine_ - ch_err='csall' + ch_err='allocate' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if @@ -170,7 +170,7 @@ subroutine psb_diluk_fact(fill_in,ialg,a,l,u,d,info,blck) m = a%get_nrows() + blck_%get_nrows() if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.& - & (m > size(d)) ) then + & (m > size(d)) ) then write(0,*) 'Wrong allocation status for L,D,U? ',& & l%get_nrows(),size(d),u%get_nrows() info = -1 @@ -203,15 +203,15 @@ subroutine psb_diluk_fact(fill_in,ialg,a,l,u,d,info,blck) call u%set_triangle() call u%set_unit() call u%set_upper() - + ! ! Nullify pointer / deallocate memory ! - if (present(blck)) then - blck_ => null() + if (present(blck)) then + blck_ => null() else call blck_%free() - deallocate(blck_,stat=info) + deallocate(blck_,stat=info) if(info.ne.0) then info=psb_err_from_subroutine_ ch_err='psb_sp_free' @@ -280,7 +280,7 @@ contains ! according to the CSR storage format. ! lia2 - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the L factor in laspk, according to the CSR storage format. + ! of the L factor in laspk, according to the CSR storage format. ! uval - real(psb_dpk_), dimension(:), input/output. ! The U factor in the incomplete factorization. ! The entries of U are stored according to the CSR format. @@ -289,12 +289,12 @@ contains ! according to the CSR storage format. ! uirp - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the U factor in uval, according to the CSR storage format. + ! of the U factor in uval, according to the CSR storage format. ! l1 - integer, output ! The number of nonzero entries in laspk. ! l2 - integer, output ! The number of nonzero entries in uval. - ! info - integer, output. + ! info - integer, output. ! Error code. ! subroutine psb_diluk_factint(fill_in,ialg,a,b,& @@ -304,7 +304,7 @@ contains implicit none - ! Arguments + ! Arguments integer(psb_ipk_), intent(in) :: fill_in, ialg type(psb_dspmat_type),intent(in) :: a,b integer(psb_ipk_),intent(inout) :: l1,l2,info @@ -330,14 +330,14 @@ contains select case(ialg) case(psb_ilu_n_,psb_milu_n_) - ! Ok + ! Ok case default info=psb_err_input_asize_invalid_i_ call psb_errpush(info,name,& & i_err=(/itwo,ialg,izero,izero,izero/)) goto 9999 end select - if (fill_in < 0) then + if (fill_in < 0) then info=psb_err_input_asize_invalid_i_ call psb_errpush(info,name, & & i_err=(/ione,fill_in,izero,izero,izero/)) @@ -349,7 +349,7 @@ contains m = ma+mb ! - ! Allocate a temporary buffer for the iluk_copyin function + ! Allocate a temporary buffer for the iluk_copyin function ! call trw%allocate(izero,izero,ione) @@ -361,7 +361,7 @@ contains call psb_errpush(info,name,a_err='psb_sp_all') goto 9999 end if - + l1=0 l2=0 lirp(1) = 1 @@ -381,34 +381,34 @@ contains uplevs(:) = m+1 row(:) = dzero rowlevs(:) = -(m+1) - + ! ! Cycle over the matrix rows ! do i = 1, m - + ! ! At each iteration of the loop we keep in a heap the column indices ! affected by the factorization. The heap is initialized and filled ! in the iluk_copyin routine, and updated during the elimination, in ! the iluk_fact routine. The heap is ideal because at each step we need ! the lowest index, but we also need to insert new items, and the heap - ! allows to do both in log time. + ! allows to do both in log time. ! d(i) = dzero if (i<=ma) then ! - ! Copy into trw the i-th local row of the matrix, stored in a - ! + ! Copy into trw the i-th local row of the matrix, stored in a + ! call iluk_copyin(i,ma,a,ione,m,row,rowlevs,heap,ktrw,trw,info) else ! ! Copy into trw the i-th local row of the matrix, stored in b - ! (as (i-ma)-th row) - ! + ! (as (i-ma)-th row) + ! call iluk_copyin(i-ma,mb,b,ione,m,row,rowlevs,heap,ktrw,trw,info) endif - + ! Do an elimination step on the current row. It turns out we only ! need to keep track of fill levels for the upper triangle, hence we ! do not have a lowlevs variable. @@ -417,7 +417,7 @@ contains & d,uja,uirp,uval,uplevs,nidx,idxs,info) ! ! Copy the row into lval/d(i)/uval - ! + ! if (info == psb_success_) call iluk_copyout(fill_in,ialg,i,m,row,rowlevs,nidx,idxs,& & l1,l2,lja,lirp,lval,d,uja,uirp,uval,uplevs,info) if (info /= psb_success_) then @@ -474,11 +474,11 @@ contains ! ! This routine is used by psb_diluk_factint in the computation of the ! ILU(k)/MILU(k) factorization of a local sparse matrix. - ! + ! ! ! Arguments: ! i - integer, input. - ! The local index of the row to be extracted from the + ! The local index of the row to be extracted from the ! sparse matrix structure a. ! m - integer, input. ! The number of rows of the local matrix stored into a. @@ -510,7 +510,7 @@ contains ! staging buffer trw. See below. ! trw - type(psb_dspmat_type), input/output. ! A staging buffer. If the matrix A is not in CSR format, we use - ! the psb_sp_getblk routine and store its output in trw; when we + ! the psb_sp_getblk routine and store its output in trw; when we ! need to call psb_sp_getblk we do it for a block of rows, and then ! we consume them from trw in successive calls to this routine, ! until we empty the buffer. Thus we will make a call to psb_sp_getblk @@ -521,8 +521,8 @@ contains use psb_base_mod implicit none - - ! Arguments + + ! Arguments type(psb_dspmat_type), intent(in) :: a type(psb_d_coo_sparse_mat), intent(inout) :: trw integer(psb_ipk_), intent(in) :: i,m,jmin,jmax @@ -542,17 +542,17 @@ contains if (psb_errstatus_fatal()) then info = psb_err_internal_error_; goto 9999 end if - call heap%init(info) + call heap%init(info) - select type (aa=> a%a) - type is (psb_d_csr_sparse_mat) + select type (aa=> a%a) + type is (psb_d_csr_sparse_mat) ! ! Take a fast shortcut if the matrix is stored in CSR format ! - + do j = aa%irp(i), aa%irp(i+1) - 1 k = aa%ja(j) - if ((jmin<=k).and.(k<=jmax)) then + if ((jmin<=k).and.(k<=jmax)) then row(k) = aa%val(j) rowlevs(k) = 0 call heap%insert(k,info) @@ -562,14 +562,14 @@ contains class default ! - ! Otherwise use psb_sp_getblk, slower but able (in principle) of + ! Otherwise use psb_sp_getblk, slower but able (in principle) of ! handling any format. In this case, a block of rows is extracted ! instead of a single row, for performance reasons, and these ! rows are copied one by one into the array row, through successive ! calls to iluk_copyin. ! - if ((mod(i,nrb) == 1).or.(nrb == 1)) then + if ((mod(i,nrb) == 1).or.(nrb == 1)) then irb = min(m-i+1,nrb) call aa%csget(i,i+irb-1,trw,info) if (info /= psb_success_) then @@ -581,11 +581,11 @@ contains ktrw=1 end if nz = trw%get_nzeros() - do + do if (ktrw > nz) exit if (trw%ia(ktrw) > i) exit k = trw%ja(ktrw) - if ((jmin<=k).and.(k<=jmax)) then + if ((jmin<=k).and.(k<=jmax)) then row(k) = trw%val(ktrw) rowlevs(k) = 0 call heap%insert(k,info) @@ -674,10 +674,10 @@ contains use psb_base_mod - implicit none + implicit none ! Arguments - type(psb_i_heap), intent(inout) :: heap + type(psb_i_heap), intent(inout) :: heap integer(psb_ipk_), intent(in) :: i, fill_in integer(psb_ipk_), intent(inout) :: nidx,info integer(psb_ipk_), intent(inout) :: rowlevs(:) @@ -690,7 +690,7 @@ contains real(psb_dpk_) :: rwk info = psb_success_ - if (.not.allocated(idxs)) then + if (.not.allocated(idxs)) then allocate(idxs(200),stat=info) if (info /= psb_success_) return endif @@ -702,34 +702,34 @@ contains ! do ! Beware: (iret < 0) means that the heap is empty, not an error. - call heap%get_first(k,iret) + call heap%get_first(k,iret) if (iret < 0) return - ! + ! ! Just in case an index has been put on the heap more than once. ! if (k == lastk) cycle - lastk = k + lastk = k nidx = nidx + 1 - if (nidx>size(idxs)) then + if (nidx>size(idxs)) then call psb_realloc(nidx+psb_heap_resize,idxs,info) if (info /= psb_success_) return end if idxs(nidx) = k - - if ((row(k) /= dzero).and.(rowlevs(k) <= fill_in).and.(ki) then + else if (j>i) then ! ! Copy the upper part of the row - ! - if (rowlevs(j) <= fill_in) then - l2 = l2 + 1 - if (size(uval) < l2) then - ! + ! + if (rowlevs(j) <= fill_in) then + l2 = l2 + 1 + if (size(uval) < l2) then + ! ! Figure out a good reallocation size! ! isz = max((l2/i)*m,int(1.2*l2),l2+100) - call psb_realloc(isz,uval,info) - if (info == psb_success_) call psb_realloc(isz,uja,info) + call psb_realloc(isz,uval,info) + if (info == psb_success_) call psb_realloc(isz,uja,info) if (info == psb_success_) call psb_realloc(isz,uplevs,info,pad=(m+1)) - if (info /= psb_success_) then + if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') goto 9999 @@ -941,11 +941,11 @@ contains end if uja(l2) = j uval(l2) = row(j) - uplevs(l2) = rowlevs(j) + uplevs(l2) = rowlevs(j) else if (ialg == psb_milu_n_) then ! ! MILU(k): add discarded entries to the diagonal one - ! + ! d(i) = d(i) + row(j) end if ! @@ -963,13 +963,13 @@ contains lirp(i+1) = l1 + 1 uirp(i+1) = l2 + 1 - ! + ! ! Check the pivot size ! if (abs(d(i)) < d_epstol) then ! ! Too small pivot: unstable factorization - ! + ! info = psb_err_pivot_too_small_ int_err(1) = i write(ch_err,'(g20.10)') d(i) diff --git a/prec/impl/psb_d_ilut_fact.f90 b/prec/impl/psb_d_ilut_fact.f90 index bcd26396..6c2dc698 100644 --- a/prec/impl/psb_d_ilut_fact.f90 +++ b/prec/impl/psb_d_ilut_fact.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,21 +27,21 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! +! ! Moved here from MLD2P4, original copyright below. -! -! -! +! +! +! ! MLD2P4 version 2.2 ! MultiLevel Domain Decomposition Parallel Preconditioners Package ! based on PSBLAS (Parallel Sparse BLAS version 3.5) -! -! (C) Copyright 2008-2018 -! -! Salvatore Filippone -! Pasqua D'Ambra -! Daniela di Serafino -! +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -53,7 +53,7 @@ ! 3. The name of the MLD2P4 group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -65,8 +65,8 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! ! File: psb_dilut_fact.f90 ! ! Subroutine: psb_dilut_fact @@ -87,7 +87,7 @@ ! CSR. The diagonal of the U factor is stored separately (actually, the ! inverse of the diagonal entries is stored; this is then managed in the ! solve stage associated to the ILU(k,t) factorization). -! +! ! ! Arguments: ! fill_in - integer, input. @@ -114,7 +114,7 @@ ! factorization. ! Note: its allocation is managed by the calling routine psb_ilu_bld, ! hence it cannot be only intent(out). -! info - integer, output. +! info - integer, output. ! Error code. ! blck - type(psb_dspmat_type), input, optional, target. ! The sparse matrix structure containing the remote rows of the @@ -122,9 +122,9 @@ ! to build an Additive Schwarz base preconditioner with overlap ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck does not contain any row. -! +! subroutine psb_dilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) - + use psb_base_mod use psb_d_ilu_fact_mod, psb_protect_name => psb_dilut_fact @@ -141,7 +141,7 @@ subroutine psb_dilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) integer(psb_ipk_), intent(in), optional :: iscale ! Local Variables integer(psb_ipk_) :: l1, l2, m, err_act, iscale_ - + type(psb_dspmat_type), pointer :: blck_ type(psb_d_csr_sparse_mat) :: ll, uu real(psb_dpk_) :: scale @@ -151,34 +151,34 @@ subroutine psb_dilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) info = psb_success_ call psb_erractionsave(err_act) - if (fill_in < 0) then + if (fill_in < 0) then info=psb_err_input_asize_invalid_i_ call psb_errpush(info,name, & & i_err=(/ione,fill_in,izero,izero,izero/)) goto 9999 end if - ! + ! ! Point to / allocate memory for the incomplete factorization ! - if (present(blck)) then + if (present(blck)) then blck_ => blck else - allocate(blck_,stat=info) - if (info == psb_success_) call blck_%csall(izero,izero,info,ione) + allocate(blck_,stat=info) + if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR') if (info /= psb_success_) then info=psb_err_from_subroutine_ - ch_err='csall' + ch_err='allocate' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if endif - if (present(iscale)) then + if (present(iscale)) then iscale_ = iscale else iscale_ = psb_ilu_scale_none_ end if - - select case(iscale_) + + select case(iscale_) case(psb_ilu_scale_none_) scale = sone case(psb_ilu_scale_maxval_) @@ -189,10 +189,10 @@ subroutine psb_dilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) call psb_errpush(info,name,i_err=(/ione*9,iscale_,izero,izero,izero/)) goto 9999 end select - + m = a%get_nrows() + blck_%get_nrows() if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.& - & (m > size(d)) ) then + & (m > size(d)) ) then write(0,*) 'Wrong allocation status for L,D,U? ',& & l%get_nrows(),size(d),u%get_nrows() info = -1 @@ -225,15 +225,15 @@ subroutine psb_dilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) call u%set_triangle() call u%set_unit() call u%set_upper() - + ! ! Nullify pointer / deallocate memory ! - if (present(blck)) then - blck_ => null() + if (present(blck)) then + blck_ => null() else call blck_%free() - deallocate(blck_,stat=info) + deallocate(blck_,stat=info) if(info.ne.0) then info=psb_err_from_subroutine_ ch_err='psb_sp_free' @@ -298,7 +298,7 @@ contains ! according to the CSR storage format. ! lirp - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the L factor in lval, according to the CSR storage format. + ! of the L factor in lval, according to the CSR storage format. ! uval - real(psb_dpk_), dimension(:), input/output. ! The U factor in the incomplete factorization. ! The entries of U are stored according to the CSR format. @@ -307,12 +307,12 @@ contains ! according to the CSR storage format. ! uirp - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the U factor in uval, according to the CSR storage format. + ! of the U factor in uval, according to the CSR storage format. ! l1 - integer, output ! The number of nonzero entries in lval. ! l2 - integer, output ! The number of nonzero entries in uval. - ! info - integer, output. + ! info - integer, output. ! Error code. ! subroutine psb_dilut_factint(fill_in,thres,a,b,& @@ -320,7 +320,7 @@ contains use psb_base_mod - implicit none + implicit none ! Arguments integer(psb_ipk_), intent(in) :: fill_in @@ -355,7 +355,7 @@ contains m = ma+mb ! - ! Allocate a temporary buffer for the ilut_copyin function + ! Allocate a temporary buffer for the ilut_copyin function ! call trw%allocate(izero,izero,ione) if (info == psb_success_) call psb_ensure_size(m+1,lirp,info) @@ -366,7 +366,7 @@ contains call psb_errpush(info,name,a_err='psb_sp_all') goto 9999 end if - + l1=0 l2=0 lirp(1) = 1 @@ -396,10 +396,10 @@ contains ! in the ilut_copyin function, and updated during the elimination, in ! the ilut_fact routine. The heap is ideal because at each step we need ! the lowest index, but we also need to insert new items, and the heap - ! allows to do both in log time. + ! allows to do both in log time. ! d(i) = czero - if (i<=ma) then + if (i<=ma) then call ilut_copyin(i,ma,a,i,ione,m,nlw,nup,jmaxup,nrmi,weight,& & row,heap,ktrw,trw,info) else @@ -414,7 +414,7 @@ contains & d,uja,uirp,uval,nidx,idxs,info) ! ! Copy the row into lval/d(i)/uval - ! + ! if (info == psb_success_) call ilut_copyout(fill_in,thres,i,m,& & nlw,nup,jmaxup,nrmi,row,nidx,idxs,& & l1,l2,lja,lirp,lval,d,uja,uirp,uval,info) @@ -429,7 +429,7 @@ contains ! ! Adjust diagonal accounting for scale factor ! - if (weight /= sone) then + if (weight /= sone) then d(1:m) = d(1:m)*weight end if @@ -469,7 +469,7 @@ contains ! - storing into a heap the column indices of the nonzero entries of the copied ! row; ! - computing the column index of the first entry with maximum absolute value - ! in the part of the row belonging to the upper triangle; + ! in the part of the row belonging to the upper triangle; ! - computing the 2-norm of the row. ! The output array row is such that it contains a full row of A, i.e. it contains ! also the zero entries of the row. This is useful for the elimination step @@ -482,11 +482,11 @@ contains ! ! This routine is used by psb_dilut_factint in the computation of the ILU(k,t) ! factorization of a local sparse matrix. - ! + ! ! ! Arguments: ! i - integer, input. - ! The local index of the row to be extracted from the + ! The local index of the row to be extracted from the ! sparse matrix structure a. ! m - integer, input. ! The number of rows of the local matrix stored into a. @@ -533,7 +533,7 @@ contains ! staging buffer trw. See below. ! trw - type(psb_dspmat_type), input/output. ! A staging buffer. If the matrix A is not in CSR format, we use - ! the psb_sp_getblk routine and store its output in trw; when we + ! the psb_sp_getblk routine and store its output in trw; when we ! need to call psb_sp_getblk we do it for a block of rows, and then ! we consume them from trw in successive calls to this routine, ! until we empty the buffer. Thus we will make a call to psb_sp_getblk @@ -542,7 +542,7 @@ contains subroutine ilut_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,& & nrmi,weight,row,heap,ktrw,trw,info) use psb_base_mod - implicit none + implicit none type(psb_dspmat_type), intent(in) :: a type(psb_d_coo_sparse_mat), intent(inout) :: trw integer(psb_ipk_), intent(in) :: i, m,jmin,jmax,jd @@ -584,29 +584,29 @@ contains dmaxup = szero nrmi = szero - select type (aa=> a%a) - type is (psb_d_csr_sparse_mat) + select type (aa=> a%a) + type is (psb_d_csr_sparse_mat) ! ! Take a fast shortcut if the matrix is stored in CSR format ! do j = aa%irp(i), aa%irp(i+1) - 1 k = aa%ja(j) - if ((jmin<=k).and.(k<=jmax)) then - row(k) = aa%val(j)*weight + if ((jmin<=k).and.(k<=jmax)) then + row(k) = aa%val(j)*weight call heap%insert(k,info) if (info /= psb_success_) exit - if (kjd) then + if (kjd) then nup = nup + 1 - if (abs(row(k))>dmaxup) then + if (abs(row(k))>dmaxup) then jmaxup = k dmaxup = abs(row(k)) end if end if end if end do - if (info /= psb_success_) then + if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='psb_insert_heap') goto 9999 @@ -617,16 +617,16 @@ contains class default - + ! - ! Otherwise use psb_sp_getblk, slower but able (in principle) of + ! Otherwise use psb_sp_getblk, slower but able (in principle) of ! handling any format. In this case, a block of rows is extracted ! instead of a single row, for performance reasons, and these ! rows are copied one by one into the array row, through successive ! calls to ilut_copyin. ! - if ((mod(i,nrb) == 1).or.(nrb == 1)) then + if ((mod(i,nrb) == 1).or.(nrb == 1)) then irb = min(m-i+1,nrb) call aa%csget(i,i+irb-1,trw,info) if (info /= psb_success_) then @@ -639,18 +639,18 @@ contains kin = ktrw nz = trw%get_nzeros() - do + do if (ktrw > nz) exit if (trw%ia(ktrw) > i) exit k = trw%ja(ktrw) - if ((jmin<=k).and.(k<=jmax)) then + if ((jmin<=k).and.(k<=jmax)) then row(k) = trw%val(ktrw)*weight call heap%insert(k,info) if (info /= psb_success_) exit - if (kjd) then + if (kjd) then nup = nup + 1 - if (abs(row(k))>dmaxup) then + if (abs(row(k))>dmaxup) then jmaxup = k dmaxup = abs(row(k)) end if @@ -734,10 +734,10 @@ contains use psb_base_mod - implicit none + implicit none ! Arguments - type(psb_i_heap), intent(inout) :: heap + type(psb_i_heap), intent(inout) :: heap integer(psb_ipk_), intent(in) :: i integer(psb_ipk_), intent(inout) :: nidx,info real(psb_dpk_), intent(in) :: thres,nrmi @@ -753,23 +753,23 @@ contains call psb_ensure_size(200*ione,idxs,info) if (info /= psb_success_) return nidx = 0 - lastk = -1 + lastk = -1 ! ! Do while there are indices to be processed ! do - call heap%get_first(k,iret) + call heap%get_first(k,iret) if (iret < 0) exit - ! + ! ! An index may have been put on the heap more than once. ! if (k == lastk) cycle - lastk = k + lastk = k lowert: if (k nidx) exit if (idxs(idxp) >= i) exit @@ -987,8 +987,8 @@ contains ! if (abs(witem) < thres*nrmi) cycle - nz = nz + 1 - xw(nz) = witem + nz = nz + 1 + xw(nz) = witem xwid(nz) = widx call heap%insert(witem,widx,info) if (info /= psb_success_) then @@ -1000,9 +1000,9 @@ contains ! ! Now we have to take out the first nlw+fill_in entries - ! + ! if (nz <= nlw+fill_in) then - ! + ! ! Just copy everything from xw, and it is already ordered ! else @@ -1014,7 +1014,7 @@ contains call psb_errpush(info,name,a_err='psb_heap_get_first') goto 9999 end if - + xw(k) = witem xwid(k) = widx end do @@ -1029,15 +1029,15 @@ contains ! Copy out the lower part of the row ! do k=1,nz - l1 = l1 + 1 + l1 = l1 + 1 if (size(lval) < l1) then - ! + ! ! Figure out a good reallocation size! - ! + ! isz = (max((l1/i)*m,int(1.2*l1),l1+100)) - call psb_realloc(isz,lval,info) - if (info == psb_success_) call psb_realloc(isz,lja,info) - if (info /= psb_success_) then + call psb_realloc(isz,lval,info) + if (info == psb_success_) call psb_realloc(isz,lja,info) + if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') goto 9999 @@ -1046,25 +1046,25 @@ contains lja(l1) = xwid(k) lval(l1) = xw(indx(k)) end do - + ! ! Make sure idxp points to the diagonal entry ! - if (idxp <= size(idxs)) then - if (idxs(idxp) < i) then - do + if (idxp <= size(idxs)) then + if (idxs(idxp) < i) then + do idxp = idxp + 1 if (idxp > nidx) exit if (idxs(idxp) >= i) exit end do end if end if - if (idxp > size(idxs)) then + if (idxp > size(idxs)) then !!$ write(0,*) 'Warning: missing diagonal element in the row ' else - if (idxs(idxp) > i) then + if (idxs(idxp) > i) then !!$ write(0,*) 'Warning: missing diagonal element in the row ' - else if (idxs(idxp) /= i) then + else if (idxs(idxp) /= i) then !!$ write(0,*) 'Warning: impossible error: diagonal has vanished' else ! @@ -1076,7 +1076,7 @@ contains if (abs(d(i)) < d_epstol) then ! ! Too small pivot: unstable factorization - ! + ! info = psb_err_pivot_too_small_ int_err(1) = i write(ch_err,'(g20.10)') d(i) @@ -1092,7 +1092,7 @@ contains end if ! - ! Now the upper part + ! Now the upper part ! call heap%init(info,dir=psb_asort_down_) @@ -1104,15 +1104,15 @@ contains nz = 0 do - + idxp = idxp + 1 if (idxp > nidx) exit widx = idxs(idxp) - if (widx <= i) then + if (widx <= i) then !!$ write(0,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp) cycle end if - if (widx > m) then + if (widx > m) then !!$ write(0,*) 'Warning: impossible value',widx,i,idxp,idxs(idxp) cycle end if @@ -1120,12 +1120,12 @@ contains ! ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway. ! - if ((widx /= jmaxup) .and. (abs(witem) < thres*nrmi)) then - cycle + if ((widx /= jmaxup) .and. (abs(witem) < thres*nrmi)) then + cycle end if nz = nz + 1 - xw(nz) = witem + xw(nz) = witem xwid(nz) = widx call heap%insert(witem,widx,info) if (info /= psb_success_) then @@ -1133,7 +1133,7 @@ contains call psb_errpush(info,name,a_err='psb_insert_heap') goto 9999 end if - + end do ! @@ -1141,7 +1141,7 @@ contains ! we include entry jmaxup. ! if (nz <= nup+fill_in) then - ! + ! ! Just copy everything from xw ! fndmaxup=.true. @@ -1155,13 +1155,13 @@ contains if (widx == jmaxup) fndmaxup=.true. end do end if - if ((i psb_d_invk_bld + use psb_d_ilu_fact_mod + implicit none + + ! Arguments + type(psb_dspmat_type), intent(in), target :: a + integer(psb_ipk_), intent(in) :: fill1, fill2 + type(psb_dspmat_type), intent(inout) :: lmat, umat + real(psb_dpk_), allocatable :: d(:) + Type(psb_desc_type), Intent(inout) :: desc + integer(psb_ipk_), intent(out) :: info + type(psb_dspmat_type), intent(in), optional :: blck + ! + integer(psb_ipk_) :: i, nztota, err_act, n_row, nrow_a, n_col + type(psb_dspmat_type) :: atmp + real(psb_dpk_), allocatable :: pq(:), pd(:) + integer(psb_ipk_), allocatable :: uplevs(:) + integer(psb_ipk_) :: debug_level, debug_unit + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np,me + integer(psb_ipk_) :: nzrmax + character(len=20) :: name, ch_err + + + info = psb_success_ + name='psb_dinvk_bld' + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + ctxt = psb_cd_get_context(desc) + call psb_info(ctxt, me, np) + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' start' + + ! + ! Check the memory available to hold the incomplete L and U factors + ! and allocate it if needed + ! + nrow_a = a%get_nrows() + nztota = a%get_nzeros() + + if (present(blck)) then + nztota = nztota + blck%get_nzeros() + end if + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & ': out get_nnzeros',nrow_a,nztota,& + & a%get_nrows(),a%get_ncols(),a%get_nzeros() + + + n_row = psb_cd_get_local_rows(desc) + n_col = psb_cd_get_local_cols(desc) + allocate(pd(n_row),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + + + call lmat%allocate(n_row,n_row,info,nz=nztota) + if (info == psb_success_) call umat%allocate(n_row,n_row,info,nz=nztota) + + + call psb_iluk_fact(fill1,psb_ilu_n_,a,lmat,umat,pd,info,blck=blck) + !,uplevs=uplevs) + !call psb_diluk_fact(fill1,psb_ilu_n_,a,lmat,umat,pd,info,blck=blck) + + if (info == psb_success_) call atmp%allocate(n_row,n_row,info,nz=nztota) + if(info/=0) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + + ! + ! Compute the aprox U^-1 and L^-1 + ! + call psb_sparse_invk(n_row,umat,atmp,fill2,info) + if (info == psb_success_) call psb_move_alloc(atmp,umat,info) + if (info == psb_success_) call lmat%transp() + if (info == psb_success_) call psb_sparse_invk(n_row,lmat,atmp,fill2,info) + if (info == psb_success_) call psb_move_alloc(atmp,lmat,info) + if (info == psb_success_) call lmat%transp() + ! Done. Hopefully.... + + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invt') + goto 9999 + end if + + call psb_move_alloc(pd,d,info) + call lmat%set_asb() + call lmat%trim() + call umat%set_asb() + call umat%trim() + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' end' + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_d_invk_bld + +subroutine psb_dsparse_invk(n,a,z,fill_in,info,inlevs) + + use psb_base_mod + use psb_d_invk_fact_mod, psb_protect_name => psb_dsparse_invk + + integer(psb_ipk_), intent(in) :: n + type(psb_dspmat_type), intent(in) :: a + type(psb_dspmat_type), intent(inout) :: z + integer(psb_ipk_), intent(in) :: fill_in + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: inlevs(:) + ! + integer(psb_ipk_) :: i,j,k, err_act, nz, nzra, nzrz, ipz1,ipz2, nzz, ip1, ip2, l2 + integer(psb_ipk_), allocatable :: ia(:), ja(:), iz(:), jz(:) + real(psb_dpk_), allocatable :: zw(:), val(:), valz(:) + integer(psb_ipk_), allocatable :: uplevs(:), rowlevs(:), idxs(:) + real(psb_dpk_), allocatable :: row(:) + type(psb_d_coo_sparse_mat) :: trw + type(psb_d_csr_sparse_mat) :: acsr, zcsr + integer(psb_ipk_) :: ktrw, nidx + type(psb_i_heap) :: heap + + real(psb_dpk_) :: alpha + character(len=20) :: name='psb_sp_invk' + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + if (.not.(a%is_triangle().and.a%is_unit().and.a%is_upper())) then + write(psb_err_unit,*) 'Wrong A ' + info = psb_err_internal_error_ + call psb_errpush(psb_err_internal_error_,name,a_err='wrong A') + goto 9999 + end if + call a%cp_to(acsr) + call trw%allocate(izero,izero,ione) + if (info == psb_success_) allocate(zw(n),iz(n),valz(n),& + & row(n),rowlevs(n),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + + allocate(uplevs(acsr%get_nzeros()),stat=info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='Allocate') + goto 9999 + end if + uplevs(:) = 0 + row(:) = dzero + rowlevs(:) = -(n+1) + + call zcsr%allocate(n,n,n*fill_in) + call zcsr%set_triangle() + call zcsr%set_unit(.false.) + call zcsr%set_upper() + call psb_ensure_size(n+1, idxs, info) + + + ! + ! + zcsr%irp(1) = 1 + nzz = 0 + + l2 = 0 + outer: do i = 1, n-1 + ! ZW = e_i + call psb_invk_copyin(i,n,acsr,ione,n,row,rowlevs,heap,ktrw,trw,info,& + & sign=-done,inlevs=inlevs) + row(i) = done + rowlevs(i) = 0 + + ! Update loop + call psb_invk_inv(fill_in,i,row,rowlevs,heap,& + & acsr%ja,acsr%irp,acsr%val,uplevs,nidx,idxs,info) + + call psb_invk_copyout(fill_in,i,n,row,rowlevs,nidx,idxs,& + & l2,zcsr%ja,zcsr%irp,zcsr%val,info) + + nzz = l2 + end do outer + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='mainloop') + goto 9999 + end if + ipz1 = nzz+1 + call psb_ensure_size(ipz1,zcsr%val,info) + call psb_ensure_size(ipz1,zcsr%ja,info) + zcsr%val(ipz1) = done + zcsr%ja(ipz1) = n + zcsr%irp(n+1) = ipz1+1 + call zcsr%set_sorted() + call z%mv_from(zcsr) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_dsparse_invk + +subroutine psb_d_invk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,ktrw,trw,info,sign,inlevs) + + use psb_base_mod + use psb_d_invk_fact_mod, psb_protect_name => psb_d_invk_copyin + + implicit none + + ! Arguments + type(psb_d_csr_sparse_mat), intent(in) :: a + type(psb_d_coo_sparse_mat), intent(inout) :: trw + integer(psb_ipk_), intent(in) :: i,m,jmin,jmax + integer(psb_ipk_), intent(inout) :: ktrw,info + integer(psb_ipk_), intent(inout) :: rowlevs(:) + real(psb_dpk_), intent(inout) :: row(:) + type(psb_i_heap), intent(inout) :: heap + real(psb_dpk_), optional, intent(in) :: sign + integer(psb_ipk_), intent(in), optional :: inlevs(:) + + ! Local variables + integer(psb_ipk_) :: k,j,irb,err_act, nz + integer(psb_ipk_), parameter :: nrb=16 + real(psb_dpk_) :: sign_ + character(len=20), parameter :: name='invk_copyin' + character(len=20) :: ch_err + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + call heap%init(info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_init_heap') + goto 9999 + end if + + if (present(sign)) then + sign_ = sign + else + sign_ = done + end if + + + ! + ! Take a fast shortcut if the matrix is stored in CSR format + ! + if (present(inlevs)) then + do j = a%irp(i), a%irp(i+1) - 1 + k = a%ja(j) + if ((jmin<=k).and.(k<=jmax)) then + row(k) = sign_ * a%val(j) + rowlevs(k) = inlevs(j) + call heap%insert(k,info) + end if + end do + else + do j = a%irp(i), a%irp(i+1) - 1 + k = a%ja(j) + if ((jmin<=k).and.(k<=jmax)) then + row(k) = sign_ * a%val(j) + rowlevs(k) = 0 + call heap%insert(k,info) + end if + end do + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_d_invk_copyin + + +subroutine psb_d_invk_copyout(fill_in,i,m,row,rowlevs,nidx,idxs,& + & l2,uia1,uia2,uaspk,info) + + use psb_base_mod + use psb_d_invk_fact_mod, psb_protect_name => psb_d_invk_copyout + + implicit none + + ! Arguments + integer(psb_ipk_), intent(in) :: fill_in, i, m, nidx + integer(psb_ipk_), intent(inout) :: l2, info + integer(psb_ipk_), intent(inout) :: rowlevs(:), idxs(:) + integer(psb_ipk_), allocatable, intent(inout) :: uia1(:), uia2(:) + real(psb_dpk_), allocatable, intent(inout) :: uaspk(:) + real(psb_dpk_), intent(inout) :: row(:) + + ! Local variables + integer(psb_ipk_) :: j,isz,err_act,int_err(5),idxp + character(len=20), parameter :: name='psb_diluk_factint' + character(len=20) :: ch_err + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + do idxp=1,nidx + + j = idxs(idxp) + + if (j>=i) then + ! + ! Copy the upper part of the row + ! + if (rowlevs(j) <= fill_in) then + l2 = l2 + 1 + if (size(uaspk) < l2) then + ! + ! Figure out a good reallocation size! + ! + isz = max(int(1.2*l2),l2+100) + call psb_realloc(isz,uaspk,info) + if (info == psb_success_) call psb_realloc(isz,uia1,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='Allocate') + goto 9999 + end if + end if + uia1(l2) = j + uaspk(l2) = row(j) + end if + ! + ! Re-initialize row(j) and rowlevs(j) + ! + row(j) = dzero + rowlevs(j) = -(m+1) + end if + end do + + uia2(i+1) = l2 + 1 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_d_invk_copyout + +subroutine psb_dinvk_inv(fill_in,i,row,rowlevs,heap,ja,irp,val,uplevs,& + & nidx,idxs,info) + + use psb_base_mod + use psb_d_invk_fact_mod, psb_protect_name => psb_dinvk_inv + + implicit none + + ! Arguments + type(psb_i_heap), intent(inout) :: heap + integer(psb_ipk_), intent(in) :: i, fill_in + integer(psb_ipk_), intent(inout) :: nidx,info + integer(psb_ipk_), intent(inout) :: rowlevs(:) + integer(psb_ipk_), allocatable, intent(inout) :: idxs(:) + integer(psb_ipk_), intent(in) :: ja(:),irp(:),uplevs(:) + real(psb_dpk_), intent(in) :: val(:) + real(psb_dpk_), intent(inout) :: row(:) + + ! Local variables + integer(psb_ipk_) :: k,j,lrwk,jj,lastk, iret + real(psb_dpk_) :: rwk + + + info = psb_success_ + + call psb_ensure_size(200, idxs, info) + if (info /= psb_success_) return + nidx = 1 + idxs(1) = i + lastk = i + + ! + ! Do while there are indices to be processed + ! + do + ! Beware: (iret < 0) means that the heap is empty, not an error. + call heap%get_first(k,iret) + if (iret < 0) then +!!$ write(psb_err_unit,*) 'IINVK: ',i,' returning at ',lastk + return + end if + + ! + ! Just in case an index has been put on the heap more than once. + ! + if (k == lastk) cycle + + lastk = k + nidx = nidx + 1 + if (nidx>size(idxs)) then + call psb_realloc(nidx+psb_heap_resize,idxs,info) + if (info /= psb_success_) return + end if + idxs(nidx) = k + + if ((row(k) /= dzero).and.(rowlevs(k) <= fill_in)) then + ! + ! Note: since U is scaled while copying it out (see iluk_copyout), + ! we can use rwk in the update below + ! + rwk = row(k) + lrwk = rowlevs(k) + + do jj=irp(k),irp(k+1)-1 + j = ja(jj) + if (j<=k) then + info = -i + return + endif + ! + ! Insert the index into the heap for further processing. + ! The fill levels are initialized to a negative value. If we find + ! one, it means that it is an as yet untouched index, so we need + ! to insert it; otherwise it is already on the heap, there is no + ! need to insert it more than once. + ! + if (rowlevs(j)<0) then + call heap%insert(j,info) + if (info /= psb_success_) return + rowlevs(j) = abs(rowlevs(j)) + end if + ! + ! Update row(j) and the corresponding fill level + ! + row(j) = row(j) - rwk * val(jj) + rowlevs(j) = min(rowlevs(j),lrwk+uplevs(jj)+1) + end do + + end if + end do + +end subroutine psb_dinvk_inv diff --git a/prec/impl/psb_d_invt_fact.f90 b/prec/impl/psb_d_invt_fact.f90 new file mode 100644 index 00000000..83aa73ff --- /dev/null +++ b/prec/impl/psb_d_invt_fact.f90 @@ -0,0 +1,739 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG4PSBLAS, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! + +subroutine psb_d_invt_bld(a,fillin,invfill,thresh,invthresh,& + & lmat,d,umat,desc,info,blck) + + use psb_base_mod + use psb_d_invt_fact_mod, psb_protect_name => psb_d_invt_bld + use psb_d_ilu_fact_mod + implicit none + + ! Arguments + type(psb_dspmat_type), intent(in), target :: a + integer(psb_ipk_), intent(in) :: fillin,invfill + real(psb_dpk_), intent(in) :: thresh + real(psb_dpk_), intent(in) :: invthresh + type(psb_dspmat_type), intent(inout) :: lmat, umat + real(psb_dpk_), allocatable :: d(:) + Type(psb_desc_type), Intent(inout) :: desc + integer(psb_ipk_), intent(out) :: info + type(psb_dspmat_type), intent(in), optional :: blck + ! + integer(psb_ipk_) :: i, nztota, err_act, n_row, nrow_a, n_col + type(psb_dspmat_type) :: atmp + real(psb_dpk_), allocatable :: pq(:), pd(:), w(:) + integer(psb_ipk_) :: debug_level, debug_unit + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np,me + integer(psb_ipk_) :: nzrmax + real(psb_dpk_) :: sp_thresh + character(len=20) :: name, ch_err, fname + + + info = psb_success_ + name='psb_dinvt_fact' + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + ctxt = psb_cd_get_context(desc) + call psb_info(ctxt, me, np) + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' start' + + ! + ! Check the memory available to hold the incomplete L and U factors + ! and allocate it if needed + ! + nrow_a = a%get_nrows() + nztota = a%get_nzeros() + + if (present(blck)) then + nztota = nztota + blck%get_nzeros() + end if + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & ': out get_nnzeros',nrow_a,nztota,& + & a%get_nrows(),a%get_ncols(),a%get_nzeros() + + + n_row = psb_cd_get_local_rows(desc) + n_col = psb_cd_get_local_cols(desc) + allocate(pd(n_row),w(n_row),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + + nzrmax = fillin + sp_thresh = thresh + + call lmat%allocate(n_row,n_row,info,nz=nztota) + if (info == psb_success_) call umat%allocate(n_row,n_row,info,nz=nztota) + + if (info == 0) call psb_ilut_fact(nzrmax,sp_thresh,& + & a,lmat,umat,pd,info,blck=blck,iscale=psb_ilu_scale_maxval_) + + if (info == psb_success_) call atmp%allocate(n_row,n_row,info,nz=nztota) + if(info/=0) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (.false.) then +!!$ if (debug_level >= psb_debug_inner_) then + write(fname,'(a,i0,a)') 'invt-lo-',me,'.mtx' + call lmat%print(fname,head="INVTLOW") + write(fname,'(a,i0,a)') 'invt-up-',me,'.mtx' + call umat%print(fname,head="INVTUPP") + end if + + ! + ! Compute the approx U^-1 and L^-1 + ! + nzrmax = invfill + call psb_dsparse_invt(n_row,umat,atmp,nzrmax,invthresh,info) + if (info == psb_success_) call psb_move_alloc(atmp,umat,info) + if (info == psb_success_) call lmat%transp() + if (info == psb_success_) call psb_dsparse_invt(n_row,lmat,atmp,nzrmax,invthresh,info) + if (info == psb_success_) call psb_move_alloc(atmp,lmat,info) + if (info == psb_success_) call lmat%transp() + ! Done. Hopefully.... + + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invt') + goto 9999 + end if + + call psb_move_alloc(pd,d,info) + call lmat%set_asb() + call lmat%trim() + call umat%set_asb() + call umat%trim() + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' end' + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_d_invt_bld + +subroutine psb_dsparse_invt(n,a,z,nzrmax,sp_thresh,info) + + use psb_base_mod + use psb_d_invt_fact_mod, psb_protect_name => psb_dsparse_invt + + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_dspmat_type), intent(in) :: a + type(psb_dspmat_type), intent(inout) :: z + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_dpk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: i,j,k, err_act, nz, nzra, nzrz, ipz1,ipz2, nzz, ip1, ip2, l2 + integer(psb_ipk_), allocatable :: ia(:), ja(:), iz(:),jz(:) + real(psb_dpk_), allocatable :: zw(:), val(:), valz(:) + integer(psb_ipk_), allocatable :: uplevs(:), rowlevs(:),idxs(:) + real(psb_dpk_), allocatable :: row(:) + type(psb_d_coo_sparse_mat) :: trw + type(psb_d_csr_sparse_mat) :: acsr, zcsr + integer(psb_ipk_) :: ktrw, nidx, nlw,nup,jmaxup + type(psb_i_heap) :: heap + real(psb_dpk_) :: alpha, nrmi + character(len=20) :: name='psb_sp_invt' + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + if (.not.(a%is_triangle().and.a%is_unit().and.a%is_upper())) then + write(psb_err_unit,*) 'Wrong A ' + info = psb_err_internal_error_ + call psb_errpush(psb_err_internal_error_,name,a_err='wrong A') + goto 9999 + end if + call a%cp_to(acsr) + call trw%allocate(izero,izero,ione) + if (info == psb_success_) allocate(zw(n),iz(n),valz(n),& + & row(n),rowlevs(n),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + + call zcsr%allocate(n,n,n*nzrmax) + call zcsr%set_triangle() + call zcsr%set_unit(.false.) + call zcsr%set_upper() + ! + ! + nzz = 0 + row(:) = dzero + rowlevs(:) = 0 + l2 = 0 + zcsr%irp(1) = 1 + + outer: do i = 1, n-1 + ! ZW = e_i + call psb_d_invt_copyin(i,n,acsr,i,ione,n,nlw,nup,jmaxup,nrmi,row,& + & heap,rowlevs,ktrw,trw,info,sign=-done) + if (info /= 0) exit + row(i) = done + ! Adjust norm + if (nrmi < done) then + nrmi = sqrt(done + nrmi**2) + else + nrmi = nrmi*sqrt(done+done/(nrmi**2)) + end if + + call psb_invt_inv(sp_thresh,i,nrmi,row,heap,rowlevs,& + & acsr%ja,acsr%irp,acsr%val,nidx,idxs,info) + if (info /= 0) exit +!!$ write(0,*) 'Calling copyout ',nzrmax,nlw,nup,nidx,l2 + call psb_d_invt_copyout(nzrmax,sp_thresh,i,n,nlw,nup,jmaxup,nrmi,row,& + & nidx,idxs,l2,zcsr%ja,zcsr%irp,zcsr%val,info) + if (info /= 0) exit + nzz = l2 + end do outer + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='mainloop') + goto 9999 + end if + + ipz1 = nzz+1 + call psb_ensure_size(ipz1,zcsr%val,info) + call psb_ensure_size(ipz1,zcsr%ja,info) + zcsr%val(ipz1) = done + zcsr%ja(ipz1) = n + zcsr%irp(n+1) = ipz1+1 + + call z%mv_from(zcsr) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_dsparse_invt + +subroutine psb_d_invt_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,nrmi,row,heap,& + & irwt,ktrw,trw,info,sign) + use psb_base_mod + use psb_d_invt_fact_mod, psb_protect_name => psb_d_invt_copyin + implicit none + type(psb_d_csr_sparse_mat), intent(in) :: a + type(psb_d_coo_sparse_mat), intent(inout) :: trw + integer(psb_ipk_), intent(in) :: i, m,jmin,jmax,jd + integer(psb_ipk_), intent(inout) :: ktrw,nlw,nup,jmaxup,info + integer(psb_ipk_), intent(inout) :: irwt(:) + real(psb_dpk_), intent(inout) :: nrmi + real(psb_dpk_), intent(inout) :: row(:) + type(psb_i_heap), intent(inout) :: heap + real(psb_dpk_), intent(in), optional :: sign + ! + integer(psb_ipk_) :: k,j,irb,kin,nz, err_act + integer(psb_ipk_), parameter :: nrb=16 + real(psb_dpk_) :: dmaxup, sign_ + real(psb_dpk_), external :: dnrm2 + character(len=20), parameter :: name='invt_copyin' + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + call heap%init(info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_init_heap') + goto 9999 + end if + sign_ = done + if (present(sign)) sign_ = sign + ! + ! nrmi is the norm of the current sparse row (for the time being, + ! we use the 2-norm). + ! NOTE: the 2-norm below includes also elements that are outside + ! [jmin:jmax] strictly. Is this really important? TO BE CHECKED. + ! + + nlw = 0 + nup = 0 + jmaxup = 0 + dmaxup = dzero + nrmi = dzero + + do j = a%irp(i), a%irp(i+1) - 1 + k = a%ja(j) + if ((jmin<=k).and.(k<=jmax)) then + row(k) = sign_ * a%val(j) + call heap%insert(k,info) + irwt(k) = 1 + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + goto 9999 + end if + end if + if (kjd) then + nup = nup + 1 + if (abs(row(k))>dmaxup) then + jmaxup = k + dmaxup = abs(row(k)) + end if + end if + end do + nz = a%irp(i+1) - a%irp(i) + nrmi = dnrm2(nz,a%val(a%irp(i):),ione) + + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_d_invt_copyin + +subroutine psb_d_invt_copyout(fill_in,thres,i,m,nlw,nup,jmaxup,nrmi,row, & + & nidx,idxs,l2,ja,irp,val,info) + + use psb_base_mod + use psb_d_invt_fact_mod, psb_protect_name => psb_d_invt_copyout + + implicit none + + ! Arguments + integer(psb_ipk_), intent(in) :: fill_in,i,m,nidx,nlw,nup,jmaxup + integer(psb_ipk_), intent(in) :: idxs(:) + integer(psb_ipk_), intent(inout) :: l2, info + integer(psb_ipk_), allocatable, intent(inout) :: ja(:),irp(:) + real(psb_dpk_), intent(in) :: thres,nrmi + real(psb_dpk_),allocatable, intent(inout) :: val(:) + real(psb_dpk_), intent(inout) :: row(:) + + ! Local variables + real(psb_dpk_),allocatable :: xw(:) + integer(psb_ipk_), allocatable :: xwid(:), indx(:) + real(psb_dpk_) :: witem, wmin + integer(psb_ipk_) :: widx + integer(psb_ipk_) :: k,isz,err_act,int_err(5),idxp, nz + type(psb_d_idx_heap) :: heap + character(len=20), parameter :: name='invt_copyout' + character(len=20) :: ch_err + logical :: fndmaxup + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + ! + ! Here we need to apply also the dropping rule base on the fill-in. + ! We do it by putting into a heap the elements that are not dropped + ! by using the 2-norm rule, and then copying them out. + ! + ! The heap goes down on the entry absolute value, so the first item + ! is the largest absolute value. + ! +!!$ write(0,*) 'invt_copyout ',nidx,nup+fill_in + call heap%init(info,dir=psb_asort_down_) + + if (info == psb_success_) allocate(xwid(nidx),xw(nidx),indx(nidx),stat=info) + if (info /= psb_success_) then + info=psb_err_alloc_request_ + call psb_errpush(info,name,i_err=(/3*nidx/),& + & a_err='real(psb_dpk_)') + goto 9999 + end if + + ! + ! First the lower part + ! + + nz = 0 + idxp = 0 + + do + + idxp = idxp + 1 + if (idxp > nidx) exit + if (idxs(idxp) >= i) exit + widx = idxs(idxp) + witem = row(widx) + ! + ! Dropping rule based on the 2-norm + ! + if (abs(witem) < thres*nrmi) cycle + + nz = nz + 1 + xw(nz) = witem + xwid(nz) = widx + call heap%insert(witem,widx,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + goto 9999 + end if + end do + + if (nz > 1) then + write(psb_err_unit,*) 'Warning: lower triangle from invt???? ' + end if + + + if (idxp <= size(idxs)) then + if (idxs(idxp) < i) then + do + idxp = idxp + 1 + if (idxp > nidx) exit + if (idxs(idxp) >= i) exit + end do + end if + end if + idxp = idxp - 1 + nz = 0 + wmin=HUGE(wmin) + if (.false.) then + do + + idxp = idxp + 1 + if (idxp > nidx) exit + widx = idxs(idxp) + if (widx < i) then + write(psb_err_unit,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp) + cycle + end if + if (widx > m) then + cycle + end if + witem = row(widx) + ! + ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway. + ! + if ((widx /= jmaxup) .and. (widx /= i) .and. (abs(witem) < thres*nrmi)) then + cycle + end if + if ((widx/=jmaxup).and.(nz > nup+fill_in)) then + if (abs(witem) < wmin) cycle + endif + wmin = min(abs(witem),wmin) + nz = nz + 1 + xw(nz) = witem + xwid(nz) = widx + call heap%insert(witem,widx,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + goto 9999 + end if + + end do + + ! + ! Now we have to take out the first nup-fill_in entries. But make sure + ! we include entry jmaxup. + ! + if (nz <= nup+fill_in) then + ! + ! Just copy everything from xw + ! + fndmaxup=.true. + else + fndmaxup = .false. + nz = nup+fill_in + do k=1,nz + call heap%get_first(witem,widx,info) + xw(k) = witem + xwid(k) = widx + if (widx == jmaxup) fndmaxup=.true. + end do + end if + if ((i nidx) exit + widx = idxs(idxp) + if (widx < i) then + write(psb_err_unit,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp) + cycle + end if + if (widx > m) then + cycle + end if + witem = row(widx) + ! + ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway. + ! + if ((widx /= i) .and. (abs(witem) < thres*nrmi)) then + cycle + end if + if (nz > nup+fill_in) then + if (abs(witem) < wmin) cycle + endif + wmin = min(abs(witem),wmin) + nz = nz + 1 + xw(nz) = witem + xwid(nz) = widx + call heap%insert(witem,widx,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + goto 9999 + end if + + end do + + ! + ! Now we have to take out the first nup-fill_in entries. But make sure + ! we include entry jmaxup. + ! + if (nz > nup+fill_in) then + nz = nup+fill_in + do k=1,nz + call heap%get_first(witem,widx,info) + xw(k) = witem + xwid(k) = widx + end do + end if + end if + + ! + ! Now we put things back into ascending column order + ! + call psb_msort(xwid(1:nz),indx(1:nz),dir=psb_sort_up_) + + ! + ! Copy out the upper part of the row + ! + do k=1,nz + l2 = l2 + 1 + if (size(val) < l2) then + ! + ! Figure out a good reallocation size! + ! + isz = max(int(1.2*l2),l2+100) + call psb_realloc(isz,val,info) + if (info == psb_success_) call psb_realloc(isz,ja,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='Allocate') + goto 9999 + end if + end if + ja(l2) = xwid(k) + val(l2) = xw(indx(k)) + end do + + ! + ! Set row to zero + ! + do idxp=1,nidx + row(idxs(idxp)) = dzero + end do + + irp(i+1) = l2 + 1 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_d_invt_copyout + +subroutine psb_d_invt_inv(thres,i,nrmi,row,heap,irwt,ja,irp,val,nidx,idxs,info) + + use psb_base_mod + use psb_d_invt_fact_mod, psb_protect_name => psb_d_invt_inv + + implicit none + + ! Arguments + type(psb_i_heap), intent(inout) :: heap + integer(psb_ipk_), intent(in) :: i + integer(psb_ipk_), intent(inout) :: nidx,info + integer(psb_ipk_), intent(inout) :: irwt(:) + real(psb_dpk_), intent(in) :: thres,nrmi + integer(psb_ipk_), allocatable, intent(inout) :: idxs(:) + integer(psb_ipk_), intent(in) :: ja(:),irp(:) + real(psb_dpk_), intent(in) :: val(:) + real(psb_dpk_), intent(inout) :: row(:) + + ! Local Variables + integer(psb_ipk_) :: k,j,jj,lastk,iret + real(psb_dpk_) :: rwk, alpha + + info = psb_success_ + + call psb_ensure_size(200, idxs, info) + if (info /= psb_success_) return + nidx = 1 + idxs(1) = i + lastk = i + irwt(i) = 1 +!!$ write(0,*) 'Drop Threshold ',thres*nrmi + ! + ! Do while there are indices to be processed + ! + do + + call heap%get_first(k,iret) + if (iret < 0) exit + + ! + ! An index may have been put on the heap more than once. + ! Should not happen, but just in case. + ! + if (k == lastk) cycle + lastk = k + + ! + ! Dropping rule based on the threshold: compare the absolute + ! value of each updated entry of row with thres * 2-norm of row. + ! + rwk = row(k) + + if (abs(rwk) < thres*nrmi) then + ! + ! Drop the entry. + ! + row(k) = dzero + irwt(k) = 0 + cycle + else + ! + ! Note: since U is scaled while copying it out (see ilut_copyout), + ! we can use rwk in the update below. + ! + do jj=irp(k),irp(k+1)-1 + j = ja(jj) + if (j<=k) then + info = -i + return + endif + ! + ! Update row(j) and, if it is not to be discarded, insert + ! its index into the heap for further processing. + ! + row(j) = row(j) - rwk * val(jj) + if (irwt(j) == 0) then + if (abs(row(j)) < thres*nrmi) then + ! + ! Drop the entry. + ! + row(j) = dzero + else + ! + ! Do the insertion. + ! + call heap%insert(j,info) + if (info /= psb_success_) return + irwt(j) = 1 + end if + end if + end do + end if + + ! + ! If we get here it is an index we need to keep on copyout. + ! + + nidx = nidx + 1 + call psb_ensure_size(nidx,idxs,info,addsz=psb_heap_resize) + if (info /= psb_success_) return + idxs(nidx) = k + irwt(k) = 0 + end do + + irwt(i) = 0 +end subroutine psb_d_invt_inv diff --git a/prec/impl/psb_d_prec_type_impl.f90 b/prec/impl/psb_d_prec_type_impl.f90 index 49b5bcf2..69e48079 100644 --- a/prec/impl/psb_d_prec_type_impl.f90 +++ b/prec/impl/psb_d_prec_type_impl.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,14 +27,14 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! -! +! +! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -46,7 +46,7 @@ !!$ 3. The name of the PSBLAS group or the names of its contributors may !!$ not be used to endorse or promote products derived from this !!$ software without specific written permission. -!!$ +!!$ !!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS !!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED !!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -58,14 +58,14 @@ !!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) !!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE !!$ POSSIBILITY OF SUCH DAMAGE. -!!$ -!!$ +!!$ +!!$ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine psb_d_apply2_vect(prec,x,y,desc_data,info,trans,work) use psb_base_mod use psb_d_prec_type, psb_protect_name => psb_d_apply2_vect - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_dprec_type), intent(inout) :: prec type(psb_d_vect_type),intent(inout) :: x @@ -74,7 +74,7 @@ subroutine psb_d_apply2_vect(prec,x,y,desc_data,info,trans,work) character(len=1), optional :: trans real(psb_dpk_),intent(inout), optional, target :: work(:) - character :: trans_ + character :: trans_ real(psb_dpk_), pointer :: work_(:) type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me @@ -88,25 +88,25 @@ subroutine psb_d_apply2_vect(prec,x,y,desc_data,info,trans,work) ctxt = desc_data%get_context() call psb_info(ctxt, me, np) - if (present(trans)) then + if (present(trans)) then trans_=psb_toupper(trans) else trans_='N' end if - if (present(work)) then + if (present(work)) then work_ => work else allocate(work_(4*desc_data%get_local_cols()),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') - goto 9999 + goto 9999 end if end if - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") goto 9999 @@ -115,13 +115,13 @@ subroutine psb_d_apply2_vect(prec,x,y,desc_data,info,trans,work) call prec%prec%apply(done,x,dzero,y,desc_data,info,& & trans=trans_,work=work_) - if (present(work)) then + if (present(work)) then else deallocate(work_,stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='DeAllocate') - goto 9999 + goto 9999 end if end if @@ -136,7 +136,7 @@ end subroutine psb_d_apply2_vect subroutine psb_d_apply1_vect(prec,x,desc_data,info,trans,work) use psb_base_mod use psb_d_prec_type, psb_protect_name => psb_d_apply1_vect - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_dprec_type), intent(inout) :: prec type(psb_d_vect_type),intent(inout) :: x @@ -145,7 +145,7 @@ subroutine psb_d_apply1_vect(prec,x,desc_data,info,trans,work) real(psb_dpk_),intent(inout), optional, target :: work(:) type(psb_d_vect_type) :: ww - character :: trans_ + character :: trans_ real(psb_dpk_), pointer :: work_(:) type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me @@ -159,25 +159,25 @@ subroutine psb_d_apply1_vect(prec,x,desc_data,info,trans,work) ctxt = desc_data%get_context() call psb_info(ctxt, me, np) - if (present(trans)) then + if (present(trans)) then trans_=psb_toupper(trans) else trans_='N' end if - if (present(work)) then + if (present(work)) then work_ => work else allocate(work_(4*desc_data%get_local_cols()),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') - goto 9999 + goto 9999 end if end if - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") goto 9999 @@ -188,13 +188,13 @@ subroutine psb_d_apply1_vect(prec,x,desc_data,info,trans,work) & trans=trans_,work=work_) if (info == 0) call psb_geaxpby(done,ww,dzero,x,desc_data,info) call psb_gefree(ww,desc_data,info) - if (present(work)) then + if (present(work)) then else deallocate(work_,stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='DeAllocate') - goto 9999 + goto 9999 end if end if @@ -209,7 +209,7 @@ end subroutine psb_d_apply1_vect subroutine psb_d_apply2v(prec,x,y,desc_data,info,trans,work) use psb_base_mod use psb_d_prec_type, psb_protect_name => psb_d_apply2v - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_dprec_type), intent(inout) :: prec real(psb_dpk_),intent(inout) :: x(:) @@ -218,7 +218,7 @@ subroutine psb_d_apply2v(prec,x,y,desc_data,info,trans,work) character(len=1), optional :: trans real(psb_dpk_),intent(inout), optional, target :: work(:) - character :: trans_ + character :: trans_ real(psb_dpk_), pointer :: work_(:) type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me @@ -232,37 +232,37 @@ subroutine psb_d_apply2v(prec,x,y,desc_data,info,trans,work) ctxt = desc_data%get_context() call psb_info(ctxt, me, np) - if (present(trans)) then + if (present(trans)) then trans_=trans else trans_='N' end if - if (present(work)) then + if (present(work)) then work_ => work else allocate(work_(4*desc_data%get_local_cols()),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') - goto 9999 + goto 9999 end if end if - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") goto 9999 end if call prec%prec%apply(done,x,dzero,y,desc_data,info,trans_,work=work_) - if (present(work)) then + if (present(work)) then else deallocate(work_,stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='DeAllocate') - goto 9999 + goto 9999 end if end if @@ -277,7 +277,7 @@ end subroutine psb_d_apply2v subroutine psb_d_apply1v(prec,x,desc_data,info,trans) use psb_base_mod use psb_d_prec_type, psb_protect_name => psb_d_apply1v - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_dprec_type), intent(inout) :: prec real(psb_dpk_),intent(inout) :: x(:) @@ -293,36 +293,34 @@ subroutine psb_d_apply1v(prec,x,desc_data,info,trans) name='psb_d_apply1v' info = psb_success_ call psb_erractionsave(err_act) - - ctxt = desc_data%get_context() call psb_info(ctxt, me, np) - if (present(trans)) then + if (present(trans)) then trans_=psb_toupper(trans) else trans_='N' end if - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") goto 9999 end if allocate(ww(size(x)),w1(size(x)),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') - goto 9999 + goto 9999 end if call prec%prec%apply(done,x,dzero,ww,desc_data,info,& & trans_,work=w1) if(info /= psb_success_) goto 9999 x(:) = ww(:) deallocate(ww,W1,stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='DeAllocate') - goto 9999 + goto 9999 end if @@ -336,3 +334,159 @@ subroutine psb_d_apply1v(prec,x,desc_data,info,trans) end subroutine psb_d_apply1v +subroutine psb_dcprecseti(prec,what,val,info,ilev,ilmax,pos,idx) + use psb_base_mod + use psb_d_prec_type, psb_protect_name => psb_dcprecseti + implicit none + + class(psb_dprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + integer(psb_ipk_), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + ! This optional inputs are backport from the inputs available in AMG4PSBLAS, + ! they are of no actual use here a part from compatibility reasons. + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + + ! Local variables + character(len=*), parameter :: name='psb_precseti' + + info = psb_success_ + + ! We need to convert from the 'what' string to the corresponding integer + ! value befor passing the call to the set of the inner method. + select case (psb_toupper(trim(what))) + case ('SUB_FILLIN') + call prec%prec%precset(psb_ilu_fill_in_,val,info) + case ('INV_FILLIN') + call prec%prec%precset(psb_inv_fillin_,val,info) + case default + info = psb_err_invalid_args_combination_ + write(psb_err_unit,*) name,& + & ': Error: uninitialized preconditioner,',& + &' should call prec%init' + return + end select + +end subroutine psb_dcprecseti + +subroutine psb_dcprecsetr(prec,what,val,info,ilev,ilmax,pos,idx) + use psb_base_mod + use psb_d_prec_type, psb_protect_name => psb_dcprecsetr + implicit none + + class(psb_dprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + real(psb_dpk_), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + ! This optional inputs are backport from the inputs available in AMG4PSBLAS, + ! they are of no actual use here a part from compatibility reasons. + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + + ! Local variables + character(len=*), parameter :: name='psb_precsetr' + + info = psb_success_ + + ! We need to convert from the 'what' string to the corresponding integer + ! value befor passing the call to the set of the inner method. + select case (psb_toupper(trim(what))) + case('SUB_ILUTHRS') + call prec%prec%precset(psb_fact_eps_,val,info) + case('INV_THRESH') + call prec%prec%precset(psb_inv_thresh_,val,info) + case default + info = psb_err_invalid_args_combination_ + write(psb_err_unit,*) name,& + & ': Error: uninitialized preconditioner,',& + &' should call prec%init' + return + end select + +end subroutine psb_dcprecsetr + +subroutine psb_dcprecsetc(prec,what,string,info,ilev,ilmax,pos,idx) + use psb_base_mod + use psb_d_prec_type, psb_protect_name => psb_dcprecsetc + implicit none + + class(psb_dprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + character(len=*), intent(in) :: string + integer(psb_ipk_), intent(out) :: info + ! This optional inputs are backport from the inputs available in AMG4PSBLAS, + ! they are of no actual use here a part from compatibility reasons. + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + + ! Local variables + character(len=*), parameter :: name='psb_precsetc' + + info = psb_success_ + + ! We need to convert from the 'what' string to the corresponding integer + ! value befor passing the call to the set of the inner method. + select case (psb_toupper(trim(what))) + case ('SUB_SOLVE') + ! We select here the type of solver on the block + select case (psb_toupper(trim(string))) + case("ILU") + call prec%prec%precset(psb_f_type_,psb_f_ilu_k_,info) + call prec%prec%precset(psb_ilu_ialg_,psb_ilu_n_,info) + case("ILUT") + call prec%prec%precset(psb_f_type_,psb_f_ilu_t_,info) + call prec%prec%precset(psb_ilu_ialg_,psb_ilu_t_,info) + case("AINV") + call prec%prec%precset(psb_f_type_,psb_f_ainv_,info) + case("INVK") + call prec%prec%precset(psb_f_type_,psb_f_invk_,info) + case("INVT") + call prec%prec%precset(psb_f_type_,psb_f_invt_,info) + case default + ! Default to ILU(0) factorization + call prec%prec%precset(psb_f_type_,psb_f_ilu_n_,info) + call prec%prec%precset(psb_ilu_ialg_,psb_ilu_n_,info) + end select + case ("ILU_ALG") + select case (psb_toupper(trim(string))) + case ("MILU") + call prec%prec%precset(psb_ilu_ialg_,psb_milu_n_,info) + case default + ! Do nothing + end select + case ("ILUT_SCALE") + select case (psb_toupper(trim(string))) + case ("MAXVAL") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_maxval_,info) + case ("DIAG") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_diag_,info) + case ("ARWSUM") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_arwsum_,info) + case ("ARCSUM") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_arcsum_,info) + case ("ACLSUM") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_aclsum_,info) + case ("NONE") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_none_,info) + case default + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_none_,info) + end select + case ("AINV_ALG") + select case (psb_toupper(trim(string))) + case("LLK") + call prec%prec%precset(psb_ainv_alg_,psb_ainv_llk_,info) + case("SYM-LLK") + call prec%prec%precset(psb_ainv_alg_,psb_ainv_s_llk_,info) + case("STAB-LLK") + call prec%prec%precset(psb_ainv_alg_,psb_ainv_s_ft_llk_,info) + case("MLK","LMX") + call prec%prec%precset(psb_ainv_alg_,psb_ainv_mlk_,info) + case default + call prec%prec%precset(psb_ainv_alg_,psb_ainv_llk_,info) + end select + case default + + end select + +end subroutine psb_dcprecsetc diff --git a/prec/impl/psb_d_sp_drop.f90 b/prec/impl/psb_d_sp_drop.f90 new file mode 100644 index 00000000..67c49b6f --- /dev/null +++ b/prec/impl/psb_d_sp_drop.f90 @@ -0,0 +1,168 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG-AINV, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +subroutine psb_d_sp_drop(idiag,nzrmax,sp_thresh,nz,iz,valz,info) + use psb_base_mod + implicit none + real(psb_dpk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(in) :: idiag, nzrmax + integer(psb_ipk_), intent(inout) :: nz + integer(psb_ipk_), intent(inout) :: iz(:) + real(psb_dpk_), intent(inout) :: valz(:) + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: i, j, idf, nw + real(psb_dpk_) :: witem + integer(psb_ipk_) :: widx + real(psb_dpk_), allocatable :: xw(:) + integer(psb_ipk_), allocatable :: xwid(:), indx(:) + + + info = psb_success_ + + if (nz > min(size(iz),size(valz))) then + write(0,*) 'Serious size problem ',nz,size(iz),size(valz) + info = -2 + return + end if + allocate(xw(nz),xwid(nz),indx(nz),stat=info) + if (info /= psb_success_) then + write(psb_err_unit,*) ' Memory allocation failure in sp_drop',nz,info + return + endif + + ! Always keep the diagonal element + idf = -1 + do i=1, nz + if (iz(i) == idiag) then + idf = i + witem = valz(i) + widx = iz(i) + valz(i) = valz(1) + iz(i) = iz(1) + valz(1) = witem + iz(1) = widx + exit + end if + end do + + if (idf == -1) then + + xw(1:nz) = valz(1:nz) + call psb_qsort(xw(1:nz),indx(1:nz),dir=psb_asort_down_) + do i=1, nz + xwid(i) = iz(indx(i)) + end do + nw = min(nz,nzrmax) + do + if (nw <= 1) exit + if (abs(xw(nw)) < sp_thresh) then + nw = nw - 1 + else + exit + end if + end do + nw = max(nw, 1) + + else + + nw = nz-1 + + xw(1:nw) = valz(2:nz) + + call psb_qsort(xw(1:nw),indx(1:nw),dir=psb_asort_down_) + nw = min(nw,nzrmax-1) + do + if (nw <= 1) exit + if (abs(xw(nw)) < sp_thresh) then + nw = nw - 1 + else + exit + end if + end do + + do i=1, nw + xwid(i) = iz(1+indx(i)) + end do + nw = nw + 1 + xw(nw) = valz(1) + xwid(nw) = iz(1) + end if + + call psb_msort(xwid(1:nw),indx(1:nw),dir=psb_sort_up_) + + do i=1, nw + valz(i) = xw(indx(i)) + iz(i) = xwid(i) + end do + nz = nw + if (nz>nzrmax) write(0,*) 'in sp_drop: ',nw,nzrmax,nz + deallocate(xw,xwid,indx,stat=info) + if (info /= psb_success_) then + write(psb_err_unit,*) ' Memory deallocation failure in sp_drop',info + return + endif + return +end subroutine psb_d_sp_drop diff --git a/prec/impl/psb_d_sparsify.f90 b/prec/impl/psb_d_sparsify.f90 new file mode 100644 index 00000000..de4628ba --- /dev/null +++ b/prec/impl/psb_d_sparsify.f90 @@ -0,0 +1,260 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG-AINV, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +subroutine psb_d_sparsify(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,info,istart,iheap,ikr) + use psb_base_mod + implicit none + + real(psb_dpk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(in) :: idiag, n, nzrmax + real(psb_dpk_), intent(inout) :: zw(:) + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), intent(out) :: iz(:) + real(psb_dpk_), intent(out) :: valz(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: istart + type(psb_i_heap), optional :: iheap + integer(psb_ipk_), optional :: ikr(:) + ! + integer(psb_ipk_) :: i, istart_, last_i, iret,k + real(psb_dpk_) :: witem + integer(psb_ipk_) :: widx + real(psb_dpk_), allocatable :: xw(:) + integer(psb_ipk_), allocatable :: xwid(:), indx(:) + type(psb_d_idx_heap) :: heap + + + info = psb_success_ + istart_ = 1 + if (present(istart)) istart_ = max(1,istart) + if (.false.) then + nz = 0 + do i=istart_, n + if ((i == idiag).or.(abs(zw(i)) >= sp_thresh)) then + nz = nz + 1 + iz(nz) = i + valz(nz) = zw(i) + end if + end do + + else + allocate(xw(nzrmax),xwid(nzrmax),indx(nzrmax),stat=info) + if (info /= psb_success_) then + return + end if + + call heap%init(info,dir=psb_asort_down_) + + ! Keep at least the diagonal + nz = 0 + + if (present(iheap)) then + if (.not.(present(ikr))) then + write(psb_err_unit,*) 'Error: if IHEAP then also IKR' + info = -1 + return + end if + last_i = -1 + do + call iheap%get_first(i,iret) + if (iret < 0) exit + ! An index may have been put on the heap more than once. + if (i == last_i) cycle + last_i = i + if (i == idiag) then + xw(1) = zw(i) + xwid(1) = i + else if (abs(zw(i)) >= sp_thresh) then + call heap%insert(zw(i),i,info) + end if + zw(i) = dzero + ikr(i) = 0 + end do + + else + + do i=istart_, n + if (i == idiag) then + xw(1) = zw(i) + xwid(1) = i + else if (abs(zw(i)) >= sp_thresh) then + call heap%insert(zw(i),i,info) + end if + zw(i) = dzero + end do + end if + + k = 1 + do + if (k == nzrmax) exit + call heap%get_first(witem,widx,info) + if (info == -1) then + info = psb_success_ + exit + endif + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + return + end if + k = k + 1 + xw(k) = witem + xwid(k) = widx + end do + call heap%free(info) + nz = k + call psb_msort(xwid(1:nz),indx(1:nz),dir=psb_sort_up_) +!!$ write(0,*) 'sparsify output for idiag ',idiag,' :',nz,sp_thresh + do i=1, nz + valz(i) = xw(indx(i)) + iz(i) = xwid(i) +!!$ write(0,*) ' ',iz(i),valz(i) + end do + + end if + + return + +end subroutine psb_d_sparsify + + +subroutine psb_d_sparsify_list(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,lhead,listv,ikr,info) + use psb_base_mod + implicit none + + real(psb_dpk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(in) :: idiag, n, nzrmax + real(psb_dpk_), intent(inout) :: zw(:) + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), intent(out) :: iz(:) + real(psb_dpk_), intent(out) :: valz(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(inout) :: lhead, listv(:) + integer(psb_ipk_) :: ikr(:) + ! + integer(psb_ipk_) :: i, istart_, last_i, iret,k,current, next + real(psb_dpk_) :: witem + integer(psb_ipk_) :: widx + real(psb_dpk_), allocatable :: xw(:) + integer(psb_ipk_), allocatable :: xwid(:), indx(:) + + + info = psb_success_ + istart_ = 1 + allocate(xw(n),xwid(n),indx(n),stat=info) + + current = lhead + lhead = -1 + i = 0 + do while (current >0) + i = i + 1 + xw(i) = zw(current) + xwid(i) = current + + if (current == idiag) then + ! Bring the diagona into first position + witem = xw(1) + widx = xwid(1) + xw(1) = xw(i) + xwid(1) = xwid(i) + xw(i) = witem + xwid(i) = widx + end if + + zw(current) = dzero + ikr(current) = 0 + + next = listv(current) + listv(current) = -1 + current = next + end do + nz = i + if (nz > 2) call psb_hsort(xw(2:nz),ix=xwid(2:nz),& + & dir=psb_asort_down_,flag=psb_sort_keep_idx_) +!!$ write(0,*) 'Done first msort ' +!!$ write(0,*) ' after first msort for idiag ',idiag,' :',nz,sp_thresh +!!$ do i=1, nz +!!$ write(0,*) ' ',xwid(i),xw(i) +!!$ end do + + i = 2 + do while (i<=nz) + if (abs(xw(i)) < sp_thresh) exit + i = i + 1 + end do +!!$ write(0,*) 'NZ ',nz, i, nzrmax + nz = max(1,min(i-1,nzrmax)) + call psb_msort(xwid(1:nz),ix=indx(1:nz),dir=psb_sort_up_) +!!$ write(0,*) 'Done second msort ' + +!!$ write(0,*) 'sparsify output for idiag ',idiag,' :',nz,i,sp_thresh + do i=1, nz + valz(i) = xw(indx(i)) + iz(i) = xwid(i) +!!$ write(0,*) ' ',iz(i),valz(i) + end do + + return + +end subroutine psb_d_sparsify_list diff --git a/prec/impl/psb_drwclip.f90 b/prec/impl/psb_drwclip.f90 new file mode 100644 index 00000000..97aea428 --- /dev/null +++ b/prec/impl/psb_drwclip.f90 @@ -0,0 +1,90 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG-AINV, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +subroutine psb_d_rwclip(nz,ia,ja,val,imin,imax,jmin,jmax) + use psb_base_mod + + implicit none + integer(psb_ipk_), intent(inout) :: nz + integer(psb_ipk_), intent(inout) :: ia(*), ja(*) + real(psb_dpk_), intent(inout) :: val(*) + integer(psb_ipk_), intent(in) :: imin,imax,jmin,jmax + + integer(psb_ipk_) :: i,j + + j = 0 + do i=1, nz + if ((imin <= ia(i)).and.& + & (ia(i) <= imax).and.& + & (jmin <= ja(i)).and.& + & (ja(i) <= jmax) ) then + j = j + 1 + ia(j) = ia(i) + ja(j) = ja(i) + val(j) = val(i) + end if + end do + nz = j +end subroutine psb_d_rwclip diff --git a/prec/impl/psb_dsparse_biconjg_llk.F90 b/prec/impl/psb_dsparse_biconjg_llk.F90 new file mode 100644 index 00000000..919f64ab --- /dev/null +++ b/prec/impl/psb_dsparse_biconjg_llk.F90 @@ -0,0 +1,366 @@ +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +subroutine psb_dsparse_biconjg_llk(n,a,p,z,w,nzrmax,sp_thresh,info) + use psb_base_mod + use psb_d_ainv_tools_mod + use psb_d_biconjg_mod, psb_protect_name => psb_dsparse_biconjg_llk + ! + ! Left-looking variant + ! + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_d_csr_sparse_mat), intent(in) :: a + type(psb_d_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_dpk_), intent(in) :: sp_thresh + real(psb_dpk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + ! Locals + integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:) + real(psb_dpk_), allocatable :: zval(:),val(:), q(:) + integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,& + & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,& + & ipj, lastj, nextj, nzw + type(psb_i_heap) :: heap, rheap + type(psb_d_csc_sparse_mat) :: ac + real(psb_dpk_) :: alpha + character(len=20) :: name='psb_orth_llk' + logical, parameter :: debug=.false. + + allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),stat=info) + if (info == psb_success_) call ac%cp_from_fmt(a,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + return + end if + ! + ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap. + ! izcr(i): flag rows to be used for the dot products. Used to minimize + ! traffic in rheap. + ! + do i=1,n + izkr(i) = 0 + izcr(i) = 0 + zval(i) = dzero + end do + + ! Init z_1=e_1 and p_1=a_11 + p(1) = dzero + i = 1 + nz = a%irp(i+1) - a%irp(i) + do j=1,nz + if (a%ja(j) == 1) then + p(1) = a%val(j) + exit + end if + end do + if (abs(p(1)) < d_epstol) & + & p(1) = 1.d-3 + + q(1) = p(1) + ! + ! + call z%allocate(n,n,n*nzrmax) + + z%icp(1) = 1 + z%icp(2) = 2 + z%ia(1) = 1 + z%val(1) = done + nzz = 1 + + call w%allocate(n,n,n*nzrmax) + w%icp(1) = 1 + w%icp(2) = 2 + w%ia(1) = 1 + w%val(1) = done + nzw = 1 + + do i = 2, n + if (debug) write(0,*) 'Main loop iteration ',i,n + + ! + ! Update loop on Z. + ! Must be separated from update loop of W because of + ! the conflict on J that would result. + ! + + ! ZVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = dzero + ! !$ end do + zval(i) = done + izkr(i) = 1 + call heap%init(info) + if (info == psb_success_) call heap%insert(i,info) + + if (info == psb_success_) call rheap%init(info) + + do j = ac%icp(i), ac%icp(i+1)-1 + if (ac%ia(j) < i) then +!!$ write(0,*) i, ' Outer inserting ',ac%ia(j) + if (info == psb_success_) call rheap%insert(ac%ia(j),info) + izcr(ac%ia(j)) = 1 + end if + end do + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_init_heap') + return + end if + + ! Update loop + ! The idea is to keep track of the indices of the nonzeros in zval, + ! so as to only do the dot products on the rows which have nonzeros + ! in their positions; to do this we keep an extra + ! copy of A in CSC, and the row indices to be considered are in rheap. + lastj = -1 + outer: do + inner: do + call rheap%get_first(j,info) + if (debug) write(0,*) 'from get_first: ',j,info + if (info == -1) exit outer ! Empty heap + if (j > lastj) then + lastj = j + exit inner + end if + end do inner + + izcr(j) = 0 + if (j>=i) cycle outer + if (debug) write(0,*) 'update loop, using row: ',j,i + ip1 = a%irp(j) + ip2 = a%irp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (a%ja(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval) + ! ! write(psb_err_unit,*) j,i,p(i) + + alpha = (-p(i)/p(j)) +!!$ write(0,*) 'At step ',i,j,' p(i) ',p(i),alpha + if (.false..or.(abs(alpha) > sp_thresh)) then +!!$ write(0,*) 'At step ',i,j,' range ',z%icp(j), z%icp(j+1)-1, & +!!$ & ' vals ',z%ia(z%icp(j):z%icp(j+1)-1) + do k=z%icp(j), z%icp(j+1)-1 + kr = z%ia(k) + zval(kr) = zval(kr) + alpha*z%val(k) + if (izkr(kr) == 0) then +!!$ write(0,*) ' main inner Inserting ',kr + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj lastj) then + lastj = j + exit innerw + end if + end do innerw + izcr(j) = 0 + if (j>=i) cycle outerw + if (debug) write(0,*) 'update loop, using row: ',j + ip1 = ac%icp(j) + ip2 = ac%icp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (ac%ia(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-q(i)/q(j)) + if (.false..or.(abs(alpha) > sp_thresh)) then + + do k=w%icp(j), w%icp(j+1)-1 + kr = w%ia(k) + zval(kr) = zval(kr) + alpha*w%val(k) + if (izkr(kr) == 0) then + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj psb_dsparse_biconjg_llk_noth + + ! + ! Left-looking variant, with NO drop rule on p(i)/p(j) + ! + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_d_csr_sparse_mat), intent(in) :: a + type(psb_d_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_dpk_), intent(in) :: sp_thresh + real(psb_dpk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + ! Locals + integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:) + real(psb_dpk_), allocatable :: zval(:),val(:), q(:) + integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,& + & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,& + & ipj, lastj, nextj, nzw + type(psb_i_heap) :: heap, rheap + type(psb_d_csc_sparse_mat) :: ac + real(psb_dpk_) :: alpha + character(len=20) :: name='psb_orth_llk_noth' + logical, parameter :: debug=.false. + + allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),stat=info) + if (info == psb_success_) call ac%cp_from_fmt(a,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + return + end if + ! + ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap. + ! izcr(i): flag rows to be used for the dot products. Used to minimize + ! traffic in rheap. + ! + do i=1,n + izkr(i) = 0 + izcr(i) = 0 + zval(i) = dzero + end do + + ! Init z_1=e_1 and p_1=a_11 + p(1) = dzero + i = 1 + nz = a%irp(i+1) - a%irp(i) + do j=1,nz + if (a%ja(j) == 1) then + p(1) = a%val(j) + exit + end if + end do + if (abs(p(1)) < d_epstol) & + & p(1) = 1.d-3 + + q(1) = p(1) + ! + ! + call z%allocate(n,n,n*nzrmax) + + z%icp(1) = 1 + z%icp(2) = 2 + z%ia(1) = 1 + z%val(1) = done + nzz = 1 + + call w%allocate(n,n,n*nzrmax) + w%icp(1) = 1 + w%icp(2) = 2 + w%ia(1) = 1 + w%val(1) = done + nzw = 1 + + do i = 2, n + if (debug) write(0,*) 'Main loop iteration ',i,n + + ! + ! Update loop on Z. + ! Must be separated from update loop of W because of + ! the conflict on J that would result. + ! + + ! ZVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = dzero + ! !$ end do + zval(i) = done + izkr(i) = 1 + call heap%init(info) + if (info == psb_success_) call heap%insert(i,info) + + if (info == psb_success_) call rheap%init(info) + do j = ac%icp(i), ac%icp(i+1)-1 + if (ac%ia(j) < i) then + if (info == psb_success_) call rheap%insert(ac%ia(j),info) + izcr(ac%ia(j)) = 1 + end if + end do + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_init_heap') + return + end if + + ! Update loop + ! The idea is to keep track of the indices of the nonzeros in zval, + ! so as to only do the dot products on the rows which have nonzeros + ! in their positions; to do this we keep an extra + ! copy of A in CSC, and the row indices to be considered are in rheap. + lastj = -1 + outer: do + inner: do + call rheap%get_first(j,info) + if (debug) write(0,*) 'from get_first: ',j,info + if (info == -1) exit outer ! Empty heap + if (j > lastj) then + lastj = j + exit inner + end if + end do inner + + izcr(j) = 0 + if (j>=i) cycle outer + if (debug) write(0,*) 'update loop, using row: ',j,i + ip1 = a%irp(j) + ip2 = a%irp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (a%ja(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-p(i)/p(j)) + + if (.true.) then + do k=z%icp(j), z%icp(j+1)-1 + kr = z%ia(k) + zval(kr) = zval(kr) + alpha*z%val(k) + if (izkr(kr) == 0) then + + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj lastj) then + lastj = j + exit innerw + end if + end do innerw + izcr(j) = 0 + if (j>=i) cycle outerw + if (debug) write(0,*) 'update loop, using row: ',j + ip1 = ac%icp(j) + ip2 = ac%icp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (ac%ia(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-q(i)/q(j)) + if (.true.) then + + do k=w%icp(j), w%icp(j+1)-1 + kr = w%ia(k) + zval(kr) = zval(kr) + alpha*w%val(k) + if (izkr(kr) == 0) then + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj psb_dsparse_biconjg_mlk + ! + ! Left-looking variant + ! + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_d_csr_sparse_mat), intent(in) :: a + type(psb_d_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_dpk_), intent(in) :: sp_thresh + real(psb_dpk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + ! Locals + integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:), hlist(:), bfr(:), rwlist(:) + real(psb_dpk_), allocatable :: zval(:),val(:), q(:) + integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,& + & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,& + & ipj, lastj, nextj, nzw, hlhead, li, mj, kkc, ifrst, ilst, rwhead + type(psb_i_heap) :: heap, rheap + type(psb_d_csc_sparse_mat) :: ac + real(psb_dpk_) :: alpha + character(len=20) :: name='psb_biconjg_mlk' + logical, parameter :: debug=.false., test_merge=.true. + + allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n), & + & hlist(n),rwlist(n),bfr(n),stat=info) + if (info == psb_success_) call ac%cp_from_fmt(a,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + return + end if + ! + ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap. + ! izcr(i): flag rows to be used for the dot products. Used to minimize + ! traffic in rheap. + ! + do i=1,n + izkr(i) = 0 + izcr(i) = 0 + zval(i) = dzero + hlist(i) = -1 + rwlist(i) = -1 + end do + + ! Init z_1=e_1 and p_1=a_11 + p(1) = dzero + i = 1 + nz = a%irp(i+1) - a%irp(i) + do j=1,nz + if (a%ja(j) == 1) then + p(1) = a%val(j) + exit + end if + end do + if (abs(p(1)) < d_epstol) & + & p(1) = 1.d-3 + + q(1) = p(1) + ! + ! + call z%allocate(n,n,n*nzrmax) + + z%icp(1) = 1 + z%icp(2) = 2 + z%ia(1) = 1 + z%val(1) = done + nzz = 1 + + call w%allocate(n,n,n*nzrmax) + w%icp(1) = 1 + w%icp(2) = 2 + w%ia(1) = 1 + w%val(1) = done + nzw = 1 + + + do i = 2, n + if (debug) write(0,*) 'Main loop iteration ',i,n + + ! + ! Update loop on Z. + ! Must be separated from update loop of W because of + ! the conflict on J that would result. + ! + + ! ZVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = dzero + ! !$ end do + zval(i) = done + izkr(i) = 1 + rwhead = i + + hlhead = -1 + + kkc = 0 + ilst = ac%icp(i)-1 + ifrst = ac%icp(i) + do j = ac%icp(i+1)-1, ac%icp(i), -1 + if (ac%ia(j) < i) then + ilst = j + exit + end if + end do + kkc = ilst-ifrst+1 + + if (.true..or.debug) then +!!$ write(0,*) 'Outer Before insertion : ',hlhead + call printlist(hlhead,hlist) + end if + if (kkc > 0) then +!!$ write(0,*) i,' Outer Inserting : ',kkc,':',ac%ia(ifrst:ilst) + + !call hlmerge(hlhead,hlist,bfr(1:kkc)) + call hlmerge(hlhead,hlist,ac%ia(ifrst:ilst)) + end if + if (debug) then + write(0,*) 'Outer After insertion: ',hlhead + call printlist(hlhead,hlist) + end if + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='init_lists') + return + end if + + ! Update loop + ! The idea is to keep track of the indices of the nonzeros in zval, + ! so as to only do the dot products on the rows which have nonzeros + ! in their positions; to do this we keep an extra + ! copy of A in CSC, and the row indices to be considered are in rheap. + lastj = -1 + outer: do + mj = hlhead + if (mj > 0) then + hlhead = hlist(mj) + hlist(mj) = -1 + end if + j = mj + if (j < 0) exit outer + + izcr(j) = 0 + if (j>=i) cycle outer + + if (debug) write(0,*) 'update loop, using row: ',j,i,mj + ip1 = a%irp(j) + ip2 = a%irp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (a%ja(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-p(i)/p(j)) +!!$ write(0,*) 'At step ',i,j,' p(i) ',p(i),alpha +!!$ write(0,*) ' Current list is : ',hlhead +!!$ call printlist(hlhead,hlist) +!!$ + + + if (.false..or.(abs(alpha) > sp_thresh)) then + ifrst=z%icp(j) + ilst=z%icp(j+1)-1 + call hlmerge(rwhead,rwlist,z%ia(ifrst:ilst)) +!!$ write(0,*) 'At step ',i,j,' range ',z%icp(j), z%icp(j+1)-1, & +!!$ & ' vals ',z%ia(z%icp(j):z%icp(j+1)-1) + + do k=z%icp(j), z%icp(j+1)-1 + kr = z%ia(k) + zval(kr) = zval(kr) + alpha*z%val(k) + + if (izkr(kr) == 0) then +!!$ write(0,*) ' main inner Inserting ',kr +!!$ call hlmerge(rwhead,rwlist,(/kr/)) + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj)) then + ifrst = min(ifrst,kc ) + ilst = max(ilst,kc) + end if + end do + kkc = ilst-ifrst+1 + if (debug) then + write(0,*) 'Inner Before insertion: ' + call printlist(hlhead,hlist) + write(0,*) 'Inner Inserting : ',kkc,':',ac%ia(ifrst:ilst) + end if + if (ilst >= ifrst) then +!!$ write(0,*) j,i,' Inner inserting ',ac%ia(ifrst:ilst) + call hlmerge(hlhead,hlist,ac%ia(ifrst:ilst)) + end if + + if (debug) then + write(0,*) 'Inner After insertion: ',hlhead + call printlist(hlhead,hlist) + end if + + end if + if (info /= psb_success_) exit + end do + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + return + end if + end if + end do outer + call a%csget(i,i,nzra,ia,ja,val,info) + call rwclip(nzra,ia,ja,val,ione,n,ione,n) + p(i) = psb_spge_dot(nzra,ja,val,zval) + if (abs(p(i)) < d_epstol) & + & p(i) = 1.d-3 + +!!$ write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh + + ! + ! Sparsify current ZVAL and put into ZMAT + ! + call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,rwhead,rwlist,izkr,info) + + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='sparsify') + return + end if + call psb_ensure_size(nzz+nzrz, z%ia, info) + call psb_ensure_size(nzz+nzrz, z%val, info) + ipz1 = z%icp(i) + do j=1, nzrz + z%ia(ipz1 + j -1) = ia(j) + z%val(ipz1 + j -1) = val(j) + end do + z%icp(i+1) = ipz1 + nzrz + nzz = nzz + nzrz + + + ! WVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = dzero + ! !$ end do + zval(i) = done + izkr(i) = 1 + rwhead = i + hlhead = -1 + + kkc = 0 + ilst = a%irp(i)-1 + ifrst = a%irp(i) + do j = a%irp(i+1)-1, a%irp(i), -1 + if (a%ja(j) < i) then + ilst = j + exit + end if + end do + kkc = ilst-ifrst+1 + + if (debug) then + write(0,*) 'Outer Before insertion: ' + call printlist(hlhead,hlist) + write(0,*) 'Outer Inserting : ',kkc,':',a%ja(ifrst:ilst) + end if + if (kkc > 0 ) then + !call hlmerge(hlhead,hlist,bfr(1:kkc)) + call hlmerge(hlhead,hlist,a%ja(ifrst:ilst)) + end if + if (debug) then + write(0,*) 'Outer After insertion: ',hlhead + call printlist(hlhead,hlist) + end if + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='init_lists') + return + end if + + ! Update loop + ! The idea is to keep track of the indices of the nonzeros in zval, + ! so as to only do the dot products on the rows which have nonzeros + ! in their positions; to do this we keep an extra + ! copy of A in CSC, and the row indices to be considered are in rheap. + lastj = -1 + outerw: do + mj = hlhead + if (hlhead > 0) then + hlhead = hlist(mj) + hlist(mj) = -1 + end if + j = mj + if (j < 0) exit outerw + + izcr(j) = 0 + if (j>=i) cycle outerw + if (debug) write(0,*) 'update loop, using row: ',j + ip1 = ac%icp(j) + ip2 = ac%icp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (ac%ia(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-q(i)/q(j)) + if (.false..or.(abs(alpha) > sp_thresh)) then + ifrst=w%icp(j) + ilst=w%icp(j+1)-1 + call hlmerge(rwhead,rwlist,w%ia(ifrst:ilst)) + + do k=w%icp(j), w%icp(j+1)-1 + kr = w%ia(k) + zval(kr) = zval(kr) + alpha*w%val(k) + if (izkr(kr) == 0) then + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj)) then + ifrst = min(ifrst,kc ) + ilst = max(ilst,kc) + end if + end do + kkc = ilst-ifrst+1 + if (debug) then + write(0,*) 'Inner Before insertion: ' + call printlist(hlhead,hlist) + write(0,*) 'Inner Inserting : ',kkc,':',a%ja(ifrst:ilst) + end if + + call hlmerge(hlhead,hlist,a%ja(ifrst:ilst)) + + if (debug) then + write(0,*) 'Inner After insertion: ',hlhead + call printlist(hlhead,hlist) + end if + if (debug) write(0,*) 'update loop, adding indices: ',& + & a%ja(a%irp(kr):a%irp(kr+1)-1) + + end if + if (info /= psb_success_) exit + end do + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + return + end if + end if + end do outerw + ip1 = ac%icp(i) + ip2 = ac%icp(i+1) - 1 + do + if (ip2 < ip1 ) exit + if (ac%ia(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval) + if (abs(q(i)) < d_epstol) & + & q(i) = 1.d-3 + +!!$ write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh + ! + ! Sparsify current ZVAL and put into ZMAT + ! + call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,rwhead,rwlist,izkr,info) + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='sparsify') + return + end if + call psb_ensure_size(nzw+nzrz, w%ia, info) + call psb_ensure_size(nzw+nzrz, w%val, info) + ipz1 = w%icp(i) + do j=1, nzrz + w%ia(ipz1 + j -1) = ia(j) + w%val(ipz1 + j -1) = val(j) + end do + w%icp(i+1) = ipz1 + nzrz + nzw = nzw + nzrz + + end do + +contains + + subroutine hlmerge(head,listv,vals) + integer(psb_ipk_), intent(inout) :: head, listv(:) + integer(psb_ipk_), intent(in) :: vals(:) + integer(psb_ipk_) :: i,j,k, lh, lv, nv, vv, flh, ph + + nv = size(vals) + lh = head + flh = -1 + lv = 1 + if ((head < 0).and.(nv > 0)) then + ! Adjust head if empty + head = vals(lv) + lv = lv + 1 + else if ((head > 0) .and. (nv >0)) then + ! Adjust head if first item less than it + if (head > vals(lv)) then + listv(vals(lv)) = head + head = vals(lv) + lv = lv + 1 + end if + end if + + lh = head + ph = lh + do while ((lh > 0) .and. (lv <= nv)) + if (lh == vals(lv)) then + lv = lv + 1 + else if (lh > vals(lv)) then + listv(vals(lv)) = lh + listv(ph) = vals(lv) + lh = vals(lv) + lv = lv + 1 + else + ph = lh + lh = listv(lh) + end if + end do + lh = ph + do while (lv <= nv) + listv(lh) = vals(lv) + lh = listv(lh) + lv = lv + 1 + end do + end subroutine hlmerge + + + subroutine printlist(head,listv) + integer(psb_ipk_), intent(in) :: head, listv(:) + integer(psb_ipk_) :: li + + li = head + do while (li > 0) + write(0,*) 'Item: ', li + li = listv(li) + end do + end subroutine printlist + +end subroutine psb_dsparse_biconjg_mlk diff --git a/prec/impl/psb_dsparse_biconjg_s_ft_llk.F90 b/prec/impl/psb_dsparse_biconjg_s_ft_llk.F90 new file mode 100644 index 00000000..fbf4bc02 --- /dev/null +++ b/prec/impl/psb_dsparse_biconjg_s_ft_llk.F90 @@ -0,0 +1,414 @@ +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +subroutine psb_dsparse_biconjg_s_ft_llk(n,a,p,z,w,nzrmax,sp_thresh,info) + use psb_base_mod + use psb_d_ainv_tools_mod + use psb_d_biconjg_mod, psb_protect_name => psb_dsparse_biconjg_s_ft_llk + + ! + ! Left-looking variant, stabilized i.e. product by A is applied + ! to compute the diagonal elements. + ! + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_d_csr_sparse_mat), intent(in) :: a + type(psb_d_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_dpk_), intent(in) :: sp_thresh + real(psb_dpk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + ! Locals + integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:),iww(:) + real(psb_dpk_), allocatable :: zval(:),val(:), q(:), ww(:) + integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj, nzww,& + & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,& + & ipj, lastj, nextj, nzw, nzrw + type(psb_i_heap) :: heap, rheap + type(psb_d_csc_sparse_mat) :: ac + real(psb_dpk_) :: alpha, tmpq,tmpq2 + character(len=20) :: name='psb_orth_llk' + logical, parameter :: debug=.false. + + allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),iww(n),ww(n),stat=info) + if (info == psb_success_) call ac%cp_from_fmt(a,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + return + end if + ! + ! Init pointers to: + ! ljr(i): last occupied column index within row I + ! izcr(i): first occupied row index within column I + ! + do i=1,n + izkr(i) = 0 + izcr(i) = 0 + zval(i) = dzero + end do + + ! Init z_1=e_1 and p_1=a_11 + p(1) = dzero + i = 1 + nz = a%irp(i+1) - a%irp(i) + do j=1,nz + if (a%ja(j) == 1) then + p(1) = a%val(j) + exit + end if + end do + if (abs(p(1)) < d_epstol) & + & p(1) = 1.d-3 + + q(1) = p(1) + ! + ! + call z%allocate(n,n,n*nzrmax) + + z%icp(1) = 1 + z%icp(2) = 2 + z%ia(1) = 1 + z%val(1) = done + nzz = 1 + + call w%allocate(n,n,n*nzrmax) + w%icp(1) = 1 + w%icp(2) = 2 + w%ia(1) = 1 + w%val(1) = done + nzw = 1 + + do i = 2, n + if (debug) write(0,*) 'Main loop iteration ',i,n + + ! + ! Update loop on Z. + ! Must be separated from update loop of W because of + ! the conflict on J that would result. + ! + + ! ZVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = dzero + ! !$ end do + zval(i) = done + izkr(i) = 1 + call heap%init(info) + if (info == psb_success_) call heap%insert(i,info) + if (info == psb_success_) call rheap%init(info) + do j = ac%icp(i), ac%icp(i+1)-1 + if (ac%ia(j) lastj) then + lastj = j + exit inner + end if + end do inner + izcr(j) = 0 + if (j>=i) exit outer + if (debug) write(0,*) 'update loop, using row: ',j + ip1 = w%icp(j) + ip2 = w%icp(j+1) - 1 + nzra = max(0,ip2 - ip1 + 1) + nzww = 0 + call psb_d_spvspm(done,a,nzra,w%ia(ip1:ip2),w%val(ip1:ip2),& + & dzero,nzww,iww,ww,info) + + p(i) = psb_spge_dot(nzww,iww,ww,zval) + + ipz1 = z%icp(j) + ipz2 = z%icp(j+1) + nzrz = ipz2-ipz1 + alpha = (-p(i)/p(j)) +!!$ write(0,*) ' p(i)/p(j) ',i,j,alpha,p(i),p(j) + if (.false..or.(abs(alpha) > sp_thresh)) then + + do k=ipz1, ipz2-1 + kr = z%ia(k) + zval(kr) = zval(kr) + alpha*z%val(k) + if (izkr(kr) == 0) then + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj lastj) then + lastj = j + exit innerw + end if + end do innerw + izcr(j) = 0 + if (j>=i) exit outerw + if (debug) write(0,*) 'update loop, using row: ',j + if (.false.) then + ip1 = ac%icp(j) + ip2 = ac%icp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (ac%ia(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + else + ip1 = z%icp(j) + ip2 = z%icp(j+1) - 1 + nzra = max(0,ip2 - ip1 + 1) + nzww = 0 + call psb_d_spmspv(done,ac,nzra,z%ia(ip1:ip2),z%val(ip1:ip2),& + & dzero,nzww,iww,ww,info) + + q(i) = psb_spge_dot(nzww,iww,ww,zval) + end if + + ipz1 = w%icp(j) + ipz2 = w%icp(j+1) + nzrz = ipz2-ipz1 + alpha = (-q(i)/q(j)) +!!$ write(0,*) ' q(i)/q(j) ',i,j,alpha,q(i),q(j) + if (.false..or.(abs(alpha) > sp_thresh)) then + + do k=ipz1, ipz2-1 + kr = w%ia(k) + zval(kr) = zval(kr) + alpha*w%val(k) + if (izkr(kr) == 0) then + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj psb_dsparse_biconjg_s_llk + + ! + ! Left-looking variant SYMMETRIC/HERMITIAN A. You have been warned! + ! + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_d_csr_sparse_mat), intent(in) :: a + type(psb_d_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_dpk_), intent(in) :: sp_thresh + real(psb_dpk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + ! Locals + integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:) + real(psb_dpk_), allocatable :: zval(:),val(:), q(:) + integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,& + & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,& + & ipj, lastj, nextj, nzw,kk + type(psb_i_heap) :: heap, rheap + type(psb_d_csc_sparse_mat) :: ac + real(psb_dpk_) :: alpha, zvalmax + character(len=20) :: name='psb_orth_llk' + logical, parameter :: debug=.false. + + allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),stat=info) + if (info == psb_success_) call ac%cp_from_fmt(a,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + return + end if + ! + ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap. + ! izcr(i): flag rows to be used for the dot products. Used to minimize + ! traffic in rheap. + ! + do i=1,n + izkr(i) = 0 + izcr(i) = 0 + zval(i) = dzero + end do + + ! Init z_1=e_1 and p_1=a_11 + p(1) = dzero + i = 1 + nz = a%irp(i+1) - a%irp(i) + do j=1,nz + if (a%ja(j) == 1) then + p(1) = a%val(j) + exit + end if + end do + if (abs(p(1)) < d_epstol) & + & p(1) = 1.d-3 + + ! + ! + call z%allocate(n,n,n*nzrmax) + + z%icp(1) = 1 + z%icp(2) = 2 + z%ia(1) = 1 + z%val(1) = done + nzz = 1 + zvalmax = done + + do i = 2, n + if (debug) write(0,*) 'Main loop iteration ',i,n + + ! + ! Update loop on Z. + ! Must be separated from update loop of W because of + ! the conflict on J that would result. + ! + + ! ZVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = dzero + ! !$ end do + zval(i) = done + izkr(i) = 1 + call heap%init(info) + if (info == psb_success_) call heap%insert(i,info) + + if (info == psb_success_) call rheap%init(info) + do j = ac%icp(i), ac%icp(i+1)-1 + if (ac%ia(j) < i) then + if (info == psb_success_) call rheap%insert(ac%ia(j),info) + izcr(ac%ia(j)) = 1 + end if + end do + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_init_heap') + return + end if + + ! Update loop + ! The idea is to keep track of the indices of the nonzeros in zval, + ! so as to only do the dot products on the rows which have nonzeros + ! in their positions; to do this we keep an extra + ! copy of A in CSC, and the row indices to be considered are in rheap. + lastj = -1 + outer: do + inner: do + call rheap%get_first(j,info) + if (debug) write(0,*) 'from get_first: ',j,info + if (info == -1) exit outer ! Empty heap + if (j > lastj) then + lastj = j + exit inner + end if + end do inner + + izcr(j) = 0 + if (j>=i) cycle outer + if (debug) write(0,*) 'update loop, using row: ',j,i + ip1 = a%irp(j) + ip2 = a%irp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (a%ja(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-p(i)/p(j)) + + if (.false..or.(abs(alpha) > sp_thresh)) then + do k=z%icp(j), z%icp(j+1)-1 + kr = z%ia(k) + zval(kr) = zval(kr) + alpha*z%val(k) +!!$ if (abs(zval(kr)) > 1e16) then +!!$ write(0,*) i,j,p(i),p(j),alpha,z%val(k),alpha*z%val(k),kr,zval(kr) +!!$ end if + if (izkr(kr) == 0) then + + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' start' + + iscale_ = psb_ilu_scale_none_ + if (present(iscale)) iscale_ = iscale + weight = sone + ! + ! Check the memory available to hold the W and Z factors + ! and allocate it if needed + ! + nrow_a = a%get_nrows() + nztota = a%get_nzeros() + + if (present(blck)) then + nztota = nztota + blck%get_nzeros() + end if + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & ': out get_nnzeros',nrow_a,nztota,& + & a%get_nrows(),a%get_ncols(),a%get_nzeros() + + + n_row = desc%get_local_rows() + allocate(pq(n_row),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + + nzrmax = fillin + sp_thresh = thresh + + ! + ! Ok, let's start first with Z (i.e. Upper) + ! + call a%csclip(acoo,info,imax=n_row,jmax=n_row) + call acsr%mv_from_coo(acoo,info) + select case(iscale_) + case(psb_ilu_scale_none_) + ! Ok, do nothing. + + case(psb_ilu_scale_maxval_) + weight = acsr%maxval() + weight = sone/weight + call acsr%scal(weight,info) + + case(psb_ilu_scale_arcsum_) + allocate(arws(n_row),acls(n_row),ad(n_row),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + call acsr%arwsum(arws) + call acsr%aclsum(acls) + ad(1:n_row) = sqrt(sqrt(arws(1:n_row)*acls(1:n_row))) + ad(1:n_row) = sone/ad(1:n_row) + call acsr%scal(ad,info,side='L') + call acsr%scal(ad,info,side='R') + case default + call psb_errpush(psb_err_from_subroutine_,name,a_err='wrong iscale') + goto 9999 + end select + + ! + ! Here for the actual workhorses. + ! Only biconjg is surviving for now.... + ! + call psb_sparse_biconjg(alg,n_row,acsr,pq,& + & zmat,wmat,nzrmax,sp_thresh,info) + ! Done. Hopefully.... + + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='sparse_biconjg') + goto 9999 + end if + call atmp%mv_from(acsr) + + ! + ! Is this right??? + ! + do i=1, n_row + if (abs(pq(i)) < d_epstol) then + pq(i) = sone + else + pq(i) = sone/pq(i) + end if + end do + + select case(iscale_) + case(psb_ilu_scale_none_) + ! Ok, do nothing. + case(psb_ilu_scale_maxval_) + pq(:) = pq(:)*weight + + case(psb_ilu_scale_arcsum_) + call zmat%scal(ad,info,side='L') + call wmat%scal(ad,info,side='R') + + case default + call psb_errpush(psb_err_from_subroutine_,name,a_err='wrong iscale') + goto 9999 + end select + + call psb_move_alloc(pq,d,info) + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' end' + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_s_ainv_bld diff --git a/prec/impl/psb_s_bjacprec_impl.f90 b/prec/impl/psb_s_bjacprec_impl.f90 index 72ac6048..d4a4fd17 100644 --- a/prec/impl/psb_s_bjacprec_impl.f90 +++ b/prec/impl/psb_s_bjacprec_impl.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,13 +27,13 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! subroutine psb_s_bjac_dump(prec,info,prefix,head) use psb_base_mod use psb_s_bjacprec, psb_protect_name => psb_s_bjac_dump - implicit none + implicit none class(psb_s_bjac_prec_type), intent(in) :: prec integer(psb_ipk_), intent(out) :: info character(len=*), intent(in), optional :: prefix,head @@ -43,13 +43,13 @@ subroutine psb_s_bjac_dump(prec,info,prefix,head) character(len=80) :: prefix_ character(len=120) :: fname ! len should be at least 20 more than - ! len of prefix_ + ! len of prefix_ info = 0 ctxt = prec%get_ctxt() call psb_info(ctxt,iam,np) - if (present(prefix)) then + if (present(prefix)) then prefix_ = trim(prefix(1:min(len(prefix),len(prefix_)))) else prefix_ = "dump_fact_s" @@ -74,7 +74,7 @@ end subroutine psb_s_bjac_dump subroutine psb_s_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) use psb_base_mod use psb_s_bjacprec, psb_protect_name => psb_s_bjac_apply_vect - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_s_bjac_prec_type), intent(inout) :: prec real(psb_spk_),intent(in) :: alpha,beta @@ -118,12 +118,12 @@ subroutine psb_s_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) n_row = desc_data%get_local_rows() n_col = desc_data%get_local_cols() - if (x%get_nrows() < n_row) then + if (x%get_nrows() < n_row) then info = 36; ierr(1) = 2; ierr(2) = n_row; call psb_errpush(info,name,i_err=ierr) goto 9999 end if - if (y%get_nrows() < n_row) then + if (y%get_nrows() < n_row) then info = 36; ierr(1) = 3; ierr(2) = n_row; call psb_errpush(info,name,i_err=ierr) goto 9999 @@ -140,9 +140,9 @@ subroutine psb_s_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) end if - if (n_col <= size(work)) then + if (n_col <= size(work)) then ww => work(1:n_col) - if ((4*n_col+n_col) <= size(work)) then + if ((4*n_col+n_col) <= size(work)) then aux => work(n_col+1:) else allocate(aux(4*n_col),stat=info) @@ -152,19 +152,19 @@ subroutine psb_s_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) allocate(ww(n_col),aux(4*n_col),stat=info) endif - if (info /= psb_success_) then + if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 + goto 9999 end if do_alloc_wrk = .not.prec%is_allocated_wrk() if (do_alloc_wrk) call prec%allocate_wrk(info,vmold=x%v) associate (wv => prec%wrk(1), wv1 => prec%wrk(2)) - + select case(prec%iprcparm(psb_f_type_)) - case(psb_f_ilu_n_) - + case(psb_f_ilu_n_,psb_f_ilu_k_,psb_f_ilu_t_) + select case(trans_) case('N') call psb_spsm(sone,prec%av(psb_l_pr_),x,szero,wv,desc_data,info,& @@ -172,31 +172,58 @@ subroutine psb_s_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) if(info == psb_success_) call psb_spsm(alpha,prec%av(psb_u_pr_),wv,& & beta,y,desc_data,info,& & trans=trans_,scale='U',choice=psb_none_, work=aux) - + case('T') call psb_spsm(sone,prec%av(psb_u_pr_),x,szero,wv,desc_data,info,& & trans=trans_,scale='L',diag=prec%dv,choice=psb_none_, work=aux) if(info == psb_success_) call psb_spsm(alpha,prec%av(psb_l_pr_),wv,& & beta,y,desc_data,info,& & trans=trans_,scale='U',choice=psb_none_,work=aux) - + case('C') - + call psb_spsm(sone,prec%av(psb_u_pr_),x,szero,wv,desc_data,info,& & trans=trans_,scale='U',choice=psb_none_, work=aux) - + call wv1%mlt(sone,prec%dv,wv,szero,info,conjgx=trans_) - + if(info == psb_success_) call psb_spsm(alpha,prec%av(psb_l_pr_),wv1,& & beta,y,desc_data,info,& & trans=trans_,scale='U',choice=psb_none_,work=aux) - + + end select + if (info /= psb_success_) then + ch_err="psb_spsm" + goto 9999 + end if + + case(psb_f_ainv_,psb_f_invt_,psb_f_invk_) + ! Application of approximate inverse preconditioner, just some spmm + + select case(trans_) + case('N') + call psb_spmm(sone,prec%av(psb_l_pr_),x,szero,wv,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + + if (info == psb_success_) call wv1%mlt(sone,prec%dv,wv,szero,info) + if(info == psb_success_) & + & call psb_spmm(alpha,prec%av(psb_u_pr_),wv1,& + & beta,y,desc_data,info, trans=trans_, work=aux,doswap=.false.) + + case('T','C') + call psb_spmm(sone,prec%av(psb_l_pr_),x,szero,wv,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + if (info == psb_success_) call wv1%mlt(sone,prec%dv,wv,szero,info) + if (info == psb_success_) & + & call psb_spmm(alpha,prec%av(psb_u_pr_),wv1, & + & beta,y,desc_data,info,trans=trans_,work=aux,doswap=.false.) + end select - if (info /= psb_success_) then + if (info /= psb_success_) then ch_err="psb_spsm" goto 9999 end if - + case default info = psb_err_internal_error_ call psb_errpush(info,name,a_err='Invalid factorization') @@ -204,12 +231,12 @@ subroutine psb_s_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) end select end associate - + call psb_halo(y,desc_data,info,data=psb_comm_mov_) if (do_alloc_wrk) call prec%free_wrk(info) - if (n_col <= size(work)) then - if ((4*n_col+n_col) <= size(work)) then + if (n_col <= size(work)) then + if ((4*n_col+n_col) <= size(work)) then else deallocate(aux) endif @@ -231,7 +258,7 @@ end subroutine psb_s_bjac_apply_vect subroutine psb_s_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) use psb_base_mod use psb_s_bjacprec, psb_protect_name => psb_s_bjac_apply - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_s_bjac_prec_type), intent(inout) :: prec real(psb_spk_),intent(in) :: alpha,beta @@ -244,6 +271,7 @@ subroutine psb_s_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) ! Local variables integer(psb_ipk_) :: n_row,n_col real(psb_spk_), pointer :: ww(:), aux(:) + type(psb_d_vect_type) :: tx,ty type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me integer(psb_ipk_) :: err_act, ierr(5) @@ -273,12 +301,12 @@ subroutine psb_s_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) n_row = desc_data%get_local_rows() n_col = desc_data%get_local_cols() - if (size(x) < n_row) then + if (size(x) < n_row) then info = 36; ierr(1) = 2; ierr(2) = n_row; call psb_errpush(info,name,i_err=ierr) goto 9999 end if - if (size(y) < n_row) then + if (size(y) < n_row) then info = 36; ierr(1) = 3; ierr(2) = n_row; call psb_errpush(info,name,i_err=ierr) goto 9999 @@ -295,29 +323,29 @@ subroutine psb_s_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) end if - if (n_col <= size(work)) then + if (n_col <= size(work)) then ww => work(1:n_col) - if ((4*n_col+n_col) <= size(work)) then + if ((4*n_col+n_col) <= size(work)) then aux => work(n_col+1:) else allocate(aux(4*n_col),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 + goto 9999 end if endif else allocate(ww(n_col),aux(4*n_col),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 + goto 9999 end if endif select case(prec%iprcparm(psb_f_type_)) - case(psb_f_ilu_n_) + case(psb_f_ilu_n_,psb_f_ilu_k_,psb_f_ilu_t_) select case(trans_) case('N') @@ -344,11 +372,42 @@ subroutine psb_s_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) & trans=trans_,scale='U',choice=psb_none_,work=aux) end select - if (info /= psb_success_) then + if (info /= psb_success_) then ch_err="psb_spsm" goto 9999 end if + case(psb_f_ainv_,psb_f_invt_,psb_f_invk_) + ! Application of approximate inverse preconditioner, just some spmm + + select case(trans_) + + case('N') + call psb_spmm(sone,prec%av(psb_l_pr_),x,szero,ww,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + ww(1:n_row) = ww(1:n_row) * prec%dv%v%v(1:n_row) + if (info == psb_success_) & + & call psb_spmm(alpha,prec%av(psb_u_pr_),ww,beta,y,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + + case('T') + call psb_spmm(sone,prec%av(psb_u_pr_),x,szero,ww,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + ww(1:n_row) = ww(1:n_row) * prec%dv%v%v(1:n_row) + if (info == psb_success_) & + & call psb_spmm(alpha,prec%av(psb_l_pr_),ww,beta,y,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + + case('C') + call psb_spmm(sone,prec%av(psb_u_pr_),x,szero,ww,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + ww(1:n_row) = ww(1:n_row) * (prec%dv%v%v(1:n_row)) + if (info == psb_success_) & + & call psb_spmm(alpha,prec%av(psb_l_pr_),ww,beta,y,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + + end select + case default info = psb_err_internal_error_ @@ -358,8 +417,8 @@ subroutine psb_s_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) call psb_halo(y,desc_data,info,data=psb_comm_mov_) - if (n_col <= size(work)) then - if ((4*n_col+n_col) <= size(work)) then + if (n_col <= size(work)) then + if ((4*n_col+n_col) <= size(work)) then else deallocate(aux) endif @@ -397,11 +456,26 @@ subroutine psb_s_bjac_precinit(prec,info) call psb_Errpush(info,name) goto 9999 end if + call psb_realloc(psb_rfpsz,prec%rprcparm,info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_Errpush(info,name) + goto 9999 + end if prec%iprcparm(:) = 0 prec%iprcparm(psb_p_type_) = psb_bjac_ prec%iprcparm(psb_f_type_) = psb_f_ilu_n_ prec%iprcparm(psb_ilu_fill_in_) = 0 + prec%iprcparm(psb_ilu_ialg_) = psb_ilu_n_ + prec%iprcparm(psb_ilu_scale_) = psb_ilu_scale_none_ + prec%iprcparm(psb_inv_fillin_) = 0 + prec%iprcparm(psb_ainv_alg_) = psb_ainv_llk_ + + + prec%rprcparm(:) = 0 + prec%rprcparm(psb_fact_eps_) = 1E-1_psb_spk_ + prec%rprcparm(psb_inv_thresh_) = 1E-1_psb_spk_ call psb_erractionrestore(err_act) @@ -416,7 +490,8 @@ end subroutine psb_s_bjac_precinit subroutine psb_s_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) use psb_base_mod - use psb_prec_mod, only : psb_ilu_fct + use psb_s_ilu_fact_mod + use psb_s_ainv_fact_mod use psb_s_bjacprec, psb_protect_name => psb_s_bjac_precbld Implicit None @@ -428,12 +503,13 @@ subroutine psb_s_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) class(psb_s_base_vect_type), intent(in), optional :: vmold class(psb_i_base_vect_type), intent(in), optional :: imold - ! .. Local Scalars .. - integer(psb_ipk_) :: i, m + ! .. Local Scalars .. + integer(psb_ipk_) :: i, m, ialg, fill_in, iscale, inv_fill, iinvalg integer(psb_ipk_) :: ierr(5) character :: trans, unitd - type(psb_s_csr_sparse_mat), allocatable :: lf, uf + type(psb_sspmat_type), allocatable :: lf, uf real(psb_spk_), allocatable :: dd(:) + real(psb_spk_) :: fact_eps, inv_thresh integer(psb_ipk_) :: nztota, err_act, n_row, nrow_a,n_col, nhalo type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me @@ -462,19 +538,114 @@ subroutine psb_s_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) trans = 'N' unitd = 'U' + ! We check if all the information contained in the preconditioner structure + ! are meaningful, otherwise we give an error and get out of the build + ! procedure + ialg = prec%iprcparm(psb_ilu_ialg_) ! Integer for ILU type algorithm + iinvalg = prec%iprcparm(psb_ainv_alg_) ! Integer for AINV type algorithm + iscale = prec%iprcparm(psb_ilu_scale_) ! Integer for scaling of matrix + fact_eps = prec%rprcparm(psb_fact_eps_) ! Drop-tolerance for factorization + inv_thresh = prec%rprcparm(psb_inv_thresh_) ! Drop-tolerance for inverse + fill_in = prec%iprcparm(psb_ilu_fill_in_) ! Fill-In for factorization + inv_fill = prec%iprcparm(psb_inv_fillin_) ! Fill-In for inverse factorization + + ! Check if the type of scaling is known, pay attention that not all the + ! scalings make sense for all the factorization, if something that does not + ! make sense is required the factorization routine will fail in an + ! unnrecoverable way. + if ((iscale == psb_ilu_scale_none_).or.& + (iscale == psb_ilu_scale_maxval_).or.& + (iscale == psb_ilu_scale_diag_).or.& + (iscale == psb_ilu_scale_arwsum_).or.& + (iscale == psb_ilu_scale_aclsum_).or.& + (iscale == psb_ilu_scale_arcsum_)) then + ! Do nothing: admissible request + else + info=psb_err_from_subroutine_ + ch_err='psb_ilu_scale_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + select case (prec%iprcparm(psb_f_type_)) + case (psb_f_ainv_) + ! Check if the variant for the AINV is known to the library + select case (iinvalg) + case(psb_ainv_llk_,psb_ainv_s_llk_,psb_ainv_s_ft_llk_,psb_ainv_llk_noth_,& + & psb_ainv_mlk_) + ! Do nothing these are okay + case default + info=psb_err_from_subroutine_ + ch_err='psb_ainv_alg_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end select ! AINV Variant + ! Check if the drop-tolerance make sense + if( inv_thresh > 1) then + info=psb_err_from_subroutine_ + ch_err='psb_inv_thresh_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + case (psb_f_ilu_t_) + if (fact_eps > 1) then + ! Check if the drop-tolerance make sense + info=psb_err_from_subroutine_ + ch_err='psb_fact_eps_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + case (psb_f_invt_) + ! Check both tolerances + if (fact_eps > 1) then + info=psb_err_from_subroutine_ + ch_err='psb_fact_eps_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if( inv_thresh > 1) then + info=psb_err_from_subroutine_ + ch_err='psb_inv_thresh_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + case default + end select + + + ! Checks relative to the fill-in parameters + if (prec%iprcparm(psb_f_type_) == psb_f_ilu_n_) then + if(fill_in < 0) then + info=psb_err_from_subroutine_ + ch_err='psb_ilu_fill_in_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + else if (fill_in == 0) then + ! If the requested level of fill is equal to zero, we default to the + ! specialized ILU(0) routine + prec%iprcparm(psb_f_type_) = psb_f_ilu_n_ + end if + end if + ! If no limit on the fill_in is required we allow every fill, this is needed + ! since this quantity is used to allocate the auxiliary vectors for the + ! factorization + if (inv_fill <= 0) inv_fill = m + + ! Select on the type of factorization to be used select case(prec%iprcparm(psb_f_type_)) - case(psb_f_ilu_n_) + case(psb_f_ilu_n_) + ! ILU(0) Factorization: the total number of nonzeros of the factorized matrix + ! is equal to the one of the input matrix - if (allocated(prec%av)) then - if (size(prec%av) < psb_bp_ilu_avsz) then - do i=1,size(prec%av) + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) call prec%av(i)%free() enddo deallocate(prec%av,stat=info) endif end if - if (.not.allocated(prec%av)) then + if (.not.allocated(prec%av)) then allocate(prec%av(psb_max_avsz),stat=info) if (info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) @@ -501,27 +672,27 @@ subroutine psb_s_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) end if allocate(dd(n_row),stat=info) - if (info == psb_success_) then + if (info == psb_success_) then allocate(prec%dv, stat=info) - if (info == 0) then - if (present(vmold)) then - allocate(prec%dv%v,mold=vmold,stat=info) - else - allocate(psb_s_base_vect_type :: prec%dv%v,stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_s_base_vect_type :: prec%dv%v,stat=info) end if end if end if - if (info /= psb_success_) then + if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 + goto 9999 endif - ! This is where we have no renumbering, thus no need - call psb_ilu_fct(a,lf,uf,dd,info) + ! This is where we have no renumbering, thus no need + call psb_ilu0_fact(ialg,a,lf,uf,dd,info) if(info == psb_success_) then - call prec%av(psb_l_pr_)%mv_from(lf) - call prec%av(psb_u_pr_)%mv_from(uf) + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) call prec%av(psb_l_pr_)%set_asb() call prec%av(psb_u_pr_)%set_asb() call prec%av(psb_l_pr_)%trim() @@ -530,12 +701,375 @@ subroutine psb_s_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) ! call move_alloc(dd,prec%d) else info=psb_err_from_subroutine_ - ch_err='psb_ilu_fct' + ch_err='psb_ilu0_fact' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if - case(psb_f_none_) + case(psb_f_ilu_k_) + ! ILU(N) Incomplete LU-factorization with N levels of fill-in. Depending on + ! the type of the variant of the algorithm the may be forgotten or added to + ! the diagonal (MILU) + + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) + call prec%av(i)%free() + enddo + deallocate(prec%av,stat=info) + endif + end if + if (.not.allocated(prec%av)) then + allocate(prec%av(psb_max_avsz),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + endif + + nrow_a = desc_a%get_local_rows() + nztota = a%get_nzeros() + + n_col = desc_a%get_local_cols() + nhalo = n_col-nrow_a + n_row = nrow_a + + allocate(lf,uf,stat=info) + if (info == psb_success_) call lf%allocate(n_row,n_row,nztota) + if (info == psb_success_) call uf%allocate(n_row,n_row,nztota) + + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(dd(n_row),stat=info) + if (info == psb_success_) then + allocate(prec%dv, stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_s_base_vect_type :: prec%dv%v,stat=info) + end if + end if + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + endif + ! This is where we have no renumbering, thus no need + call psb_iluk_fact(fill_in,ialg,a,lf,uf,dd,info) + + if(info == psb_success_) then + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) + call prec%av(psb_l_pr_)%set_asb() + call prec%av(psb_u_pr_)%set_asb() + call prec%av(psb_l_pr_)%trim() + call prec%av(psb_u_pr_)%trim() + call prec%dv%bld(dd) + ! call move_alloc(dd,prec%d) + else + info=psb_err_from_subroutine_ + ch_err='psb_iluk_fact' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(psb_f_ilu_t_) + ! ILU(N,E) Incomplete LU factorization with thresholding and level of fill + + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) + call prec%av(i)%free() + enddo + deallocate(prec%av,stat=info) + endif + end if + if (.not.allocated(prec%av)) then + allocate(prec%av(psb_max_avsz),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + endif + + nrow_a = desc_a%get_local_rows() + nztota = a%get_nzeros() + + n_col = desc_a%get_local_cols() + nhalo = n_col-nrow_a + n_row = nrow_a + + allocate(lf,uf,stat=info) + if (info == psb_success_) call lf%allocate(n_row,n_row,nztota) + if (info == psb_success_) call uf%allocate(n_row,n_row,nztota) + + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(dd(n_row),stat=info) + if (info == psb_success_) then + allocate(prec%dv, stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_s_base_vect_type :: prec%dv%v,stat=info) + end if + end if + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + endif + ! This is where we have no renumbering, thus no need + call psb_ilut_fact(fill_in,fact_eps,a,lf,uf,dd,info,iscale=iscale) + + if(info == psb_success_) then + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) + call prec%av(psb_l_pr_)%set_asb() + call prec%av(psb_u_pr_)%set_asb() + call prec%av(psb_l_pr_)%trim() + call prec%av(psb_u_pr_)%trim() + call prec%dv%bld(dd) + ! call move_alloc(dd,prec%d) + else + info=psb_err_from_subroutine_ + ch_err='psb_ilut_fact' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(psb_f_ainv_) + ! Approximate Inverse Factorizations based on variants of the incomplete + ! biconjugation algorithms + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) + call prec%av(i)%free() + enddo + deallocate(prec%av,stat=info) + endif + end if + if (.not.allocated(prec%av)) then + allocate(prec%av(psb_max_avsz),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + endif + + nrow_a = desc_a%get_local_rows() + nztota = a%get_nzeros() + + n_col = desc_a%get_local_cols() + nhalo = n_col-nrow_a + n_row = nrow_a + + allocate(lf,uf,stat=info) + if (info == psb_success_) call lf%allocate(n_row,n_row,nztota) + if (info == psb_success_) call uf%allocate(n_row,n_row,nztota) + + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(dd(n_row),stat=info) + if (info == psb_success_) then + allocate(prec%dv, stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_s_base_vect_type :: prec%dv%v,stat=info) + end if + end if + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + endif + ! Computing the factorization + call psb_ainv_fact(a,iinvalg,inv_fill,inv_thresh,lf,dd,uf,desc_a,info,iscale=iscale) + + if(info == psb_success_) then + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) + call prec%av(psb_l_pr_)%set_asb() + call prec%av(psb_u_pr_)%set_asb() + call prec%av(psb_l_pr_)%trim() + call prec%av(psb_u_pr_)%trim() + call prec%dv%bld(dd) + ! call move_alloc(dd,prec%d) + else + info=psb_err_from_subroutine_ + ch_err='psb_ilut_fact' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(psb_f_invk_) + ! Approximate Inverse Factorizations based on the sparse inversion of + ! triangular factors of an ILU(n) factorization + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) + call prec%av(i)%free() + enddo + deallocate(prec%av,stat=info) + endif + end if + if (.not.allocated(prec%av)) then + allocate(prec%av(psb_max_avsz),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + endif + + nrow_a = desc_a%get_local_rows() + nztota = a%get_nzeros() + + n_col = desc_a%get_local_cols() + nhalo = n_col-nrow_a + n_row = nrow_a + + allocate(lf,uf,stat=info) + if (info == psb_success_) call lf%allocate(n_row,n_row,nztota) + if (info == psb_success_) call uf%allocate(n_row,n_row,nztota) + + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(dd(n_row),stat=info) + if (info == psb_success_) then + allocate(prec%dv, stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_s_base_vect_type :: prec%dv%v,stat=info) + end if + end if + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + endif + ! Computing the factorization + call psb_invk_fact(a,fill_in, inv_fill,lf,dd,uf,desc_a,info) + + if(info == psb_success_) then + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) + call prec%av(psb_l_pr_)%set_asb() + call prec%av(psb_u_pr_)%set_asb() + call prec%av(psb_l_pr_)%trim() + call prec%av(psb_u_pr_)%trim() + call prec%dv%bld(dd) + ! call move_alloc(dd,prec%d) + else + info=psb_err_from_subroutine_ + ch_err='psb_ilut_fact' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(psb_f_invt_) + ! Approximate Inverse Factorizations based on the sparse inversion of + ! triangular factors of an ILU(eps) factorization + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) + call prec%av(i)%free() + enddo + deallocate(prec%av,stat=info) + endif + end if + if (.not.allocated(prec%av)) then + allocate(prec%av(psb_max_avsz),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + endif + + nrow_a = desc_a%get_local_rows() + nztota = a%get_nzeros() + + n_col = desc_a%get_local_cols() + nhalo = n_col-nrow_a + n_row = nrow_a + + allocate(lf,uf,stat=info) + if (info == psb_success_) call lf%allocate(n_row,n_row,nztota) + if (info == psb_success_) call uf%allocate(n_row,n_row,nztota) + + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(dd(n_row),stat=info) + if (info == psb_success_) then + allocate(prec%dv, stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_s_base_vect_type :: prec%dv%v,stat=info) + end if + end if + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + endif + ! Computing the factorization + call psb_invt_fact(a,fill_in,inv_fill,fact_eps,inv_thresh,lf,dd,uf,& + & desc_a,info) + + if(info == psb_success_) then + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) + call prec%av(psb_l_pr_)%set_asb() + call prec%av(psb_u_pr_)%set_asb() + call prec%av(psb_l_pr_)%trim() + call prec%av(psb_u_pr_)%trim() + call prec%dv%bld(dd) + ! call move_alloc(dd,prec%d) + else + info=psb_err_from_subroutine_ + ch_err='psb_ilut_fact' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(psb_f_none_) info=psb_err_from_subroutine_ ch_err='Inconsistent prec psb_f_none_' call psb_errpush(info,name,a_err=ch_err) @@ -548,7 +1082,7 @@ subroutine psb_s_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) goto 9999 end select - if (present(amold)) then + if (present(amold)) then call prec%av(psb_l_pr_)%cscnv(info,mold=amold) call prec%av(psb_u_pr_)%cscnv(info,mold=amold) end if @@ -567,8 +1101,8 @@ subroutine psb_s_bjac_precseti(prec,what,val,info) Implicit None class(psb_s_bjac_prec_type),intent(inout) :: prec - integer(psb_ipk_), intent(in) :: what - integer(psb_ipk_), intent(in) :: val + integer(psb_ipk_), intent(in) :: what + integer(psb_ipk_), intent(in) :: val integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act, nrow character(len=20) :: name='s_bjac_precset' @@ -576,34 +1110,33 @@ subroutine psb_s_bjac_precseti(prec,what,val,info) call psb_erractionsave(err_act) info = psb_success_ - if (.not.allocated(prec%iprcparm)) then + if (.not.allocated(prec%iprcparm)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") goto 9999 end if select case(what) - case (psb_f_type_) - if (prec%iprcparm(psb_p_type_) /= psb_bjac_) then - write(psb_err_unit,*) 'WHAT is invalid for current preconditioner ',& - & prec%iprcparm(psb_p_type_),& - & 'ignoring user specification' - return - endif + case (psb_f_type_) prec%iprcparm(psb_f_type_) = val - case (psb_ilu_fill_in_) - if ((prec%iprcparm(psb_p_type_) /= psb_bjac_).or.& - & (prec%iprcparm(psb_f_type_) /= psb_f_ilu_n_)) then - write(psb_err_unit,*) 'WHAT is invalid for current preconditioner ',& - & prec%iprcparm(psb_p_type_),& - & 'ignoring user specification' - return - endif + case (psb_ilu_fill_in_) prec%iprcparm(psb_ilu_fill_in_) = val + case (psb_ilu_ialg_) + prec%iprcparm(psb_ilu_ialg_) = val + + case (psb_ilu_scale_) + prec%iprcparm(psb_ilu_scale_) = val + + case (psb_ainv_alg_) + prec%iprcparm(psb_ainv_alg_) = val + + case(psb_inv_fillin_) + prec%iprcparm(psb_inv_fillin_) = val + case default - write(psb_err_unit,*) 'WHAT is invalid, ignoring user specification' + write(psb_err_unit,'(i0," is invalid, ignoring user specification")') what end select @@ -613,3 +1146,45 @@ subroutine psb_s_bjac_precseti(prec,what,val,info) 9999 call psb_error_handler(err_act) return end subroutine psb_s_bjac_precseti + +subroutine psb_s_bjac_precsetr(prec,what,val,info) + + use psb_base_mod + use psb_s_bjacprec, psb_protect_name => psb_s_bjac_precsetr + Implicit None + + class(psb_s_bjac_prec_type),intent(inout) :: prec + integer(psb_ipk_), intent(in) :: what + real(psb_spk_), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act, nrow + character(len=20) :: name='s_bjac_precset' + + call psb_erractionsave(err_act) + + info = psb_success_ + if (.not.allocated(prec%rprcparm)) then + info = 1124 + call psb_errpush(info,name,a_err="preconditioner") + goto 9999 + end if + + select case(what) + + case (psb_fact_eps_) + prec%rprcparm(psb_fact_eps_) = val + + case (psb_inv_thresh_) + prec%rprcparm(psb_inv_thresh_) = val + + case default + write(psb_err_unit,'(i0," is invalid, ignoring user specification")') what + + end select + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_s_bjac_precsetr diff --git a/prec/impl/psb_s_ilu0_fact.f90 b/prec/impl/psb_s_ilu0_fact.f90 index bc2dd5ab..b6f442e9 100644 --- a/prec/impl/psb_s_ilu0_fact.f90 +++ b/prec/impl/psb_s_ilu0_fact.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,21 +27,21 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! +! ! Moved here from MLD2P4, original copyright below. -! -! -! +! +! +! ! MLD2P4 version 2.2 ! MultiLevel Domain Decomposition Parallel Preconditioners Package ! based on PSBLAS (Parallel Sparse BLAS version 3.5) -! -! (C) Copyright 2008-2018 -! -! Salvatore Filippone -! Pasqua D'Ambra -! Daniela di Serafino -! +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -53,7 +53,7 @@ ! 3. The name of the MLD2P4 group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -65,8 +65,8 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! ! File: psb_silu0_fact.f90 ! ! Subroutine: psb_silu0_fact @@ -75,7 +75,7 @@ ! ! This routine computes either the ILU(0) or the MILU(0) factorization of ! the diagonal blocks of a distributed matrix. These factorizations are used -! to build the 'base preconditioner' (block-Jacobi preconditioner/solver, +! to build the 'base preconditioner' (block-Jacobi preconditioner/solver, ! Additive Schwarz preconditioner) corresponding to a given level of a ! multilevel preconditioner. ! @@ -83,10 +83,10 @@ ! Y. Saad, Iterative Methods for Sparse Linear Systems, Second Edition, ! SIAM, 2003, Chapter 10. ! -! The local matrix is stored into a and blck, as specified in the description -! of the arguments below. The storage format for both the L and U factors is CSR. -! The diagonal of the U factor is stored separately (actually, the inverse of the -! diagonal entries is stored; this is then managed in the solve stage associated +! The local matrix is stored into a and blck, as specified in the description +! of the arguments below. The storage format for both the L and U factors is CSR. +! The diagonal of the U factor is stored separately (actually, the inverse of the +! diagonal entries is stored; this is then managed in the solve stage associated ! to the ILU(0)/MILU(0) factorization). ! ! The routine copies and factors "on the fly" from a and blck into l (L factor), @@ -94,7 +94,7 @@ ! ! This implementation of ILU(0)/MILU(0) is faster than the implementation in ! psb_ziluk_fct (the latter routine performs the more general ILU(k)/MILU(k)). -! +! ! ! Arguments: ! ialg - integer, input. @@ -121,7 +121,7 @@ ! factorization. ! Note: its allocation is managed by the calling routine psb_ilu_bld, ! hence it cannot be only intent(out). -! info - integer, output. +! info - integer, output. ! Error code. ! blck - type(psb_sspmat_type), input, optional, target. ! The sparse matrix structure containing the remote rows of the @@ -129,7 +129,7 @@ ! to build an Additive Schwarz base preconditioner with overlap ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck is empty. -! +! subroutine psb_silu0_fact(ialg,a,l,u,d,info,blck, upd) use psb_base_mod @@ -157,30 +157,30 @@ subroutine psb_silu0_fact(ialg,a,l,u,d,info,blck, upd) info = psb_success_ call psb_erractionsave(err_act) - ! + ! ! Point to / allocate memory for the incomplete factorization ! - if (present(blck)) then + if (present(blck)) then blck_ => blck else - allocate(blck_,stat=info) - if (info == psb_success_) call blck_%csall(izero,izero,info,ione) + allocate(blck_,stat=info) + if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR') if (info /= psb_success_) then info=psb_err_from_subroutine_ - ch_err='csall' + ch_err='allocate' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if endif - if (present(upd)) then - upd_ = psb_toupper(upd) + if (present(upd)) then + upd_ = psb_toupper(upd) else upd_ = 'F' end if - + m = a%get_nrows() + blck_%get_nrows() if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.& - & (m > size(d)) ) then + & (m > size(d)) ) then write(0,*) 'Wrong allocation status for L,D,U? ',& & l%get_nrows(),size(d),u%get_nrows() info = -1 @@ -212,12 +212,12 @@ subroutine psb_silu0_fact(ialg,a,l,u,d,info,blck, upd) call u%set_triangle() call u%set_unit() call u%set_upper() - + ! ! Nullify pointer / deallocate memory ! - if (present(blck)) then - blck_ => null() + if (present(blck)) then + blck_ => null() else call blck_%free() if(info.ne.0) then @@ -226,7 +226,7 @@ subroutine psb_silu0_fact(ialg,a,l,u,d,info,blck, upd) call psb_errpush(info,name,a_err=ch_err) goto 9999 end if - deallocate(blck_) + deallocate(blck_) endif call psb_erractionrestore(err_act) @@ -243,9 +243,9 @@ contains ! Version: real ! Note: internal subroutine of psb_silu0_fact. ! - ! This routine computes either the ILU(0) or the MILU(0) factorization of the - ! diagonal blocks of a distributed matrix. - ! These factorizations are used to build the 'base preconditioner' + ! This routine computes either the ILU(0) or the MILU(0) factorization of the + ! diagonal blocks of a distributed matrix. + ! These factorizations are used to build the 'base preconditioner' ! (block-Jacobi preconditioner/solver, Additive Schwarz ! preconditioner) corresponding to a given level of a multilevel preconditioner. ! @@ -257,7 +257,7 @@ contains ! ! The routine copies and factors "on the fly" from the sparse matrix structures a ! and b into the arrays lval, uval, d (L, U without its diagonal, diagonal of U). - ! + ! ! ! Arguments: ! ialg - integer, input. @@ -296,7 +296,7 @@ contains ! according to the CSR storage format. ! lirp - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the L factor in lval, according to the CSR storage format. + ! of the L factor in lval, according to the CSR storage format. ! uval - real(psb_spk_), dimension(:), input/output. ! The U factor in the incomplete factorization. ! The entries of U are stored according to the CSR format. @@ -305,18 +305,18 @@ contains ! according to the CSR storage format. ! uirp - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the U factor in uval, according to the CSR storage format. + ! of the U factor in uval, according to the CSR storage format. ! l1 - integer, output. ! The number of nonzero entries in lval. ! l2 - integer, output. ! The number of nonzero entries in uval. - ! info - integer, output. + ! info - integer, output. ! Error code. ! subroutine psb_silu0_factint(ialg,a,b,& & d,lval,lja,lirp,uval,uja,uirp,l1,l2,upd,info) - implicit none + implicit none ! Arguments integer(psb_ipk_), intent(in) :: ialg @@ -332,7 +332,7 @@ contains real(psb_spk_) :: dia,temp integer(psb_ipk_), parameter :: nrb=16 type(psb_s_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) + integer(psb_ipk_) :: int_err(5) character(len=20) :: name, ch_err name='psb_silu0_factint' @@ -346,10 +346,10 @@ contains select case(ialg) case(psb_ilu_n_,psb_milu_n_) - ! Ok + ! Ok case default info=psb_err_input_asize_invalid_i_ - call psb_errpush(info,name,& + call psb_errpush(info,name,& & i_err=(/ione,ialg,izero,izero,izero/)) goto 9999 end select @@ -363,7 +363,7 @@ contains end if m = ma+mb - if (psb_toupper(upd) == 'F' ) then + if (psb_toupper(upd) == 'F' ) then lirp(1) = 1 uirp(1) = 1 l1 = 0 @@ -379,14 +379,14 @@ contains if (i <= ma) then ! ! Copy the i-th local row of the matrix, stored in a, - ! into lval/d(i)/uval + ! into lval/d(i)/uval ! call ilu_copyin(i,ma,a,i,ione,m,l1,lja,lval,& & d(i),l2,uja,uval,ktrw,trw,upd) else ! ! Copy the i-th local row of the matrix, stored in b - ! (as (i-ma)-th row), into lval/d(i)/uval + ! (as (i-ma)-th row), into lval/d(i)/uval ! call ilu_copyin(i-ma,mb,b,i,ione,m,l1,lja,lval,& & d(i),l2,uja,uval,ktrw,trw,upd) @@ -437,7 +437,7 @@ contains ! dia = dia - temp*uval(jj) cycle updateloop - ! + ! else if (j > i) then ! ! search u(i,*) (i-th row of U) for a matching index j @@ -454,23 +454,23 @@ contains end if enddo end if - ! - ! If we get here we missed the cycle updateloop, which means + ! + ! If we get here we missed the cycle updateloop, which means ! that this entry does not match; thus we accumulate on the ! diagonal for MILU(0). ! - if (ialg == psb_milu_n_) then + if (ialg == psb_milu_n_) then dia = dia - temp*uval(jj) end if enddo updateloop enddo - ! + ! ! Check the pivot size - ! + ! if (abs(dia) < s_epstol) then ! ! Too small pivot: unstable factorization - ! + ! info = psb_err_pivot_too_small_ int_err(1) = i write(ch_err,'(g20.10)') abs(dia) @@ -493,7 +493,7 @@ contains else write(0,*) 'Update not implemented ' info = 31 - call psb_errpush(info,name,& + call psb_errpush(info,name,& & i_err=(/ione*13,izero,izero,izero,izero/),a_err=upd) goto 9999 @@ -515,7 +515,7 @@ contains ! Version: real ! Note: internal subroutine of psb_silu0_fact ! - ! This routine copies a row of a sparse matrix A, stored in the psb_sspmat_type + ! This routine copies a row of a sparse matrix A, stored in the psb_sspmat_type ! data structure a, into the arrays lval and uval and into the scalar variable ! dia, corresponding to the lower and upper triangles of A and to the diagonal ! entry of the row, respectively. The entries in lval and uval are stored @@ -529,14 +529,14 @@ contains ! ! The routine is used by psb_silu0_factint in the computation of the ILU(0)/MILU(0) ! factorization of a local sparse matrix. - ! + ! ! TODO: modify the routine to allow copying into output L and U that are ! already filled with indices; this would allow computing an ILU(k) pattern, - ! then use the ILU(0) internal for subsequent calls with the same pattern. + ! then use the ILU(0) internal for subsequent calls with the same pattern. ! ! Arguments: ! i - integer, input. - ! The local index of the row to be extracted from the + ! The local index of the row to be extracted from the ! sparse matrix structure a. ! m - integer, input. ! The number of rows of the local matrix stored into a. @@ -570,13 +570,13 @@ contains ! to the CSR storage format. ! uval - real(psb_spk_), dimension(:), input/output. ! The array where the entries of the row corresponding to the - ! upper triangle are copied. + ! upper triangle are copied. ! ktrw - integer, input/output. ! The index identifying the last entry taken from the ! staging buffer trw. See below. ! trw - type(psb_sspmat_type), input/output. ! A staging buffer. If the matrix A is not in CSR format, we use - ! the psb_sp_getblk routine and store its output in trw; when we + ! the psb_sp_getblk routine and store its output in trw; when we ! need to call psb_sp_getblk we do it for a block of rows, and then ! we consume them from trw in successive calls to this routine, ! until we empty the buffer. Thus we will make a call to psb_sp_getblk @@ -608,10 +608,10 @@ contains if (psb_errstatus_fatal()) then info = psb_err_internal_error_; goto 9999 end if - if (psb_toupper(upd) == 'F') then + if (psb_toupper(upd) == 'F') then - select type(aa => a%a) - type is (psb_s_csr_sparse_mat) + select type(aa => a%a) + type is (psb_s_csr_sparse_mat) ! ! Take a fast shortcut if the matrix is stored in CSR format @@ -633,19 +633,19 @@ contains end if enddo - class default + class default ! - ! Otherwise use psb_sp_getblk, slower but able (in principle) of + ! Otherwise use psb_sp_getblk, slower but able (in principle) of ! handling any format. In this case, a block of rows is extracted ! instead of a single row, for performance reasons, and these ! rows are copied one by one into lval, dia, uval, through ! successive calls to ilu_copyin. ! - if ((mod(i,nrb) == 1).or.(nrb == 1)) then + if ((mod(i,nrb) == 1).or.(nrb == 1)) then irb = min(m-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) + call aa%csget(i,i+irb-1,trw,info) if(info /= psb_success_) then info=psb_err_from_subroutine_ ch_err='csget' @@ -656,7 +656,7 @@ contains end if nz = trw%get_nzeros() - do + do if (ktrw > nz) exit if (trw%ia(ktrw) > i) exit k = trw%ja(ktrw) @@ -680,7 +680,7 @@ contains write(0,*) 'Update not implemented ' info = 31 - call psb_errpush(info,name,& + call psb_errpush(info,name,& & i_err=(/ione*13,izero,izero,izero,izero/),a_err=upd) goto 9999 diff --git a/prec/impl/psb_s_iluk_fact.f90 b/prec/impl/psb_s_iluk_fact.f90 index 4b9f1f3f..6129663b 100644 --- a/prec/impl/psb_s_iluk_fact.f90 +++ b/prec/impl/psb_s_iluk_fact.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,21 +27,21 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! +! ! Moved here from MLD2P4, original copyright below. -! -! -! +! +! +! ! MLD2P4 version 2.2 ! MultiLevel Domain Decomposition Parallel Preconditioners Package ! based on PSBLAS (Parallel Sparse BLAS version 3.5) -! -! (C) Copyright 2008-2018 -! -! Salvatore Filippone -! Pasqua D'Ambra -! Daniela di Serafino -! +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -53,7 +53,7 @@ ! 3. The name of the MLD2P4 group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -65,8 +65,8 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! ! File: psb_siluk_fact.f90 ! ! Subroutine: psb_siluk_fact @@ -74,7 +74,7 @@ ! Contains: psb_siluk_factint, iluk_copyin, iluk_fact, iluk_copyout. ! ! This routine computes either the ILU(k) or the MILU(k) factorization of the -! diagonal blocks of a distributed matrix. These factorizations are used to +! diagonal blocks of a distributed matrix. These factorizations are used to ! build the 'base preconditioner' (block-Jacobi preconditioner/solver, ! Additive Schwarz preconditioner) corresponding to a certain level of a ! multilevel preconditioner. @@ -88,7 +88,7 @@ ! U factors is CSR. The diagonal of the U factor is stored separately (actually, ! the inverse of the diagonal entries is stored; this is then managed in the solve ! stage associated to the ILU(k)/MILU(k) factorization). -! +! ! ! Arguments: ! fill_in - integer, input. @@ -118,7 +118,7 @@ ! factorization. ! Note: its allocation is managed by the calling routine psb_ilu_bld, ! hence it cannot be only intent(out). -! info - integer, output. +! info - integer, output. ! Error code. ! blck - type(psb_sspmat_type), input, optional, target. ! The sparse matrix structure containing the remote rows of the @@ -126,7 +126,7 @@ ! to build an Additive Schwarz base preconditioner with overlap ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck does not contain any row. -! +! subroutine psb_siluk_fact(fill_in,ialg,a,l,u,d,info,blck) use psb_base_mod @@ -143,7 +143,7 @@ subroutine psb_siluk_fact(fill_in,ialg,a,l,u,d,info,blck) real(psb_spk_), intent(inout) :: d(:) ! Local Variables integer(psb_ipk_) :: l1, l2, m, err_act - + type(psb_sspmat_type), pointer :: blck_ type(psb_s_csr_sparse_mat) :: ll, uu character(len=20) :: name, ch_err @@ -152,17 +152,17 @@ subroutine psb_siluk_fact(fill_in,ialg,a,l,u,d,info,blck) info = psb_success_ call psb_erractionsave(err_act) - ! + ! ! Point to / allocate memory for the incomplete factorization ! - if (present(blck)) then + if (present(blck)) then blck_ => blck else - allocate(blck_,stat=info) - if (info == psb_success_) call blck_%csall(izero,izero,info,ione) + allocate(blck_,stat=info) + if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR') if (info /= psb_success_) then info=psb_err_from_subroutine_ - ch_err='csall' + ch_err='allocate' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if @@ -170,7 +170,7 @@ subroutine psb_siluk_fact(fill_in,ialg,a,l,u,d,info,blck) m = a%get_nrows() + blck_%get_nrows() if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.& - & (m > size(d)) ) then + & (m > size(d)) ) then write(0,*) 'Wrong allocation status for L,D,U? ',& & l%get_nrows(),size(d),u%get_nrows() info = -1 @@ -203,15 +203,15 @@ subroutine psb_siluk_fact(fill_in,ialg,a,l,u,d,info,blck) call u%set_triangle() call u%set_unit() call u%set_upper() - + ! ! Nullify pointer / deallocate memory ! - if (present(blck)) then - blck_ => null() + if (present(blck)) then + blck_ => null() else call blck_%free() - deallocate(blck_,stat=info) + deallocate(blck_,stat=info) if(info.ne.0) then info=psb_err_from_subroutine_ ch_err='psb_sp_free' @@ -280,7 +280,7 @@ contains ! according to the CSR storage format. ! lia2 - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the L factor in laspk, according to the CSR storage format. + ! of the L factor in laspk, according to the CSR storage format. ! uval - real(psb_spk_), dimension(:), input/output. ! The U factor in the incomplete factorization. ! The entries of U are stored according to the CSR format. @@ -289,12 +289,12 @@ contains ! according to the CSR storage format. ! uirp - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the U factor in uval, according to the CSR storage format. + ! of the U factor in uval, according to the CSR storage format. ! l1 - integer, output ! The number of nonzero entries in laspk. ! l2 - integer, output ! The number of nonzero entries in uval. - ! info - integer, output. + ! info - integer, output. ! Error code. ! subroutine psb_siluk_factint(fill_in,ialg,a,b,& @@ -304,7 +304,7 @@ contains implicit none - ! Arguments + ! Arguments integer(psb_ipk_), intent(in) :: fill_in, ialg type(psb_sspmat_type),intent(in) :: a,b integer(psb_ipk_),intent(inout) :: l1,l2,info @@ -330,14 +330,14 @@ contains select case(ialg) case(psb_ilu_n_,psb_milu_n_) - ! Ok + ! Ok case default info=psb_err_input_asize_invalid_i_ call psb_errpush(info,name,& & i_err=(/itwo,ialg,izero,izero,izero/)) goto 9999 end select - if (fill_in < 0) then + if (fill_in < 0) then info=psb_err_input_asize_invalid_i_ call psb_errpush(info,name, & & i_err=(/ione,fill_in,izero,izero,izero/)) @@ -349,7 +349,7 @@ contains m = ma+mb ! - ! Allocate a temporary buffer for the iluk_copyin function + ! Allocate a temporary buffer for the iluk_copyin function ! call trw%allocate(izero,izero,ione) @@ -361,7 +361,7 @@ contains call psb_errpush(info,name,a_err='psb_sp_all') goto 9999 end if - + l1=0 l2=0 lirp(1) = 1 @@ -381,34 +381,34 @@ contains uplevs(:) = m+1 row(:) = szero rowlevs(:) = -(m+1) - + ! ! Cycle over the matrix rows ! do i = 1, m - + ! ! At each iteration of the loop we keep in a heap the column indices ! affected by the factorization. The heap is initialized and filled ! in the iluk_copyin routine, and updated during the elimination, in ! the iluk_fact routine. The heap is ideal because at each step we need ! the lowest index, but we also need to insert new items, and the heap - ! allows to do both in log time. + ! allows to do both in log time. ! d(i) = szero if (i<=ma) then ! - ! Copy into trw the i-th local row of the matrix, stored in a - ! + ! Copy into trw the i-th local row of the matrix, stored in a + ! call iluk_copyin(i,ma,a,ione,m,row,rowlevs,heap,ktrw,trw,info) else ! ! Copy into trw the i-th local row of the matrix, stored in b - ! (as (i-ma)-th row) - ! + ! (as (i-ma)-th row) + ! call iluk_copyin(i-ma,mb,b,ione,m,row,rowlevs,heap,ktrw,trw,info) endif - + ! Do an elimination step on the current row. It turns out we only ! need to keep track of fill levels for the upper triangle, hence we ! do not have a lowlevs variable. @@ -417,7 +417,7 @@ contains & d,uja,uirp,uval,uplevs,nidx,idxs,info) ! ! Copy the row into lval/d(i)/uval - ! + ! if (info == psb_success_) call iluk_copyout(fill_in,ialg,i,m,row,rowlevs,nidx,idxs,& & l1,l2,lja,lirp,lval,d,uja,uirp,uval,uplevs,info) if (info /= psb_success_) then @@ -474,11 +474,11 @@ contains ! ! This routine is used by psb_siluk_factint in the computation of the ! ILU(k)/MILU(k) factorization of a local sparse matrix. - ! + ! ! ! Arguments: ! i - integer, input. - ! The local index of the row to be extracted from the + ! The local index of the row to be extracted from the ! sparse matrix structure a. ! m - integer, input. ! The number of rows of the local matrix stored into a. @@ -510,7 +510,7 @@ contains ! staging buffer trw. See below. ! trw - type(psb_sspmat_type), input/output. ! A staging buffer. If the matrix A is not in CSR format, we use - ! the psb_sp_getblk routine and store its output in trw; when we + ! the psb_sp_getblk routine and store its output in trw; when we ! need to call psb_sp_getblk we do it for a block of rows, and then ! we consume them from trw in successive calls to this routine, ! until we empty the buffer. Thus we will make a call to psb_sp_getblk @@ -521,8 +521,8 @@ contains use psb_base_mod implicit none - - ! Arguments + + ! Arguments type(psb_sspmat_type), intent(in) :: a type(psb_s_coo_sparse_mat), intent(inout) :: trw integer(psb_ipk_), intent(in) :: i,m,jmin,jmax @@ -542,17 +542,17 @@ contains if (psb_errstatus_fatal()) then info = psb_err_internal_error_; goto 9999 end if - call heap%init(info) + call heap%init(info) - select type (aa=> a%a) - type is (psb_s_csr_sparse_mat) + select type (aa=> a%a) + type is (psb_s_csr_sparse_mat) ! ! Take a fast shortcut if the matrix is stored in CSR format ! - + do j = aa%irp(i), aa%irp(i+1) - 1 k = aa%ja(j) - if ((jmin<=k).and.(k<=jmax)) then + if ((jmin<=k).and.(k<=jmax)) then row(k) = aa%val(j) rowlevs(k) = 0 call heap%insert(k,info) @@ -562,14 +562,14 @@ contains class default ! - ! Otherwise use psb_sp_getblk, slower but able (in principle) of + ! Otherwise use psb_sp_getblk, slower but able (in principle) of ! handling any format. In this case, a block of rows is extracted ! instead of a single row, for performance reasons, and these ! rows are copied one by one into the array row, through successive ! calls to iluk_copyin. ! - if ((mod(i,nrb) == 1).or.(nrb == 1)) then + if ((mod(i,nrb) == 1).or.(nrb == 1)) then irb = min(m-i+1,nrb) call aa%csget(i,i+irb-1,trw,info) if (info /= psb_success_) then @@ -581,11 +581,11 @@ contains ktrw=1 end if nz = trw%get_nzeros() - do + do if (ktrw > nz) exit if (trw%ia(ktrw) > i) exit k = trw%ja(ktrw) - if ((jmin<=k).and.(k<=jmax)) then + if ((jmin<=k).and.(k<=jmax)) then row(k) = trw%val(ktrw) rowlevs(k) = 0 call heap%insert(k,info) @@ -674,10 +674,10 @@ contains use psb_base_mod - implicit none + implicit none ! Arguments - type(psb_i_heap), intent(inout) :: heap + type(psb_i_heap), intent(inout) :: heap integer(psb_ipk_), intent(in) :: i, fill_in integer(psb_ipk_), intent(inout) :: nidx,info integer(psb_ipk_), intent(inout) :: rowlevs(:) @@ -690,7 +690,7 @@ contains real(psb_spk_) :: rwk info = psb_success_ - if (.not.allocated(idxs)) then + if (.not.allocated(idxs)) then allocate(idxs(200),stat=info) if (info /= psb_success_) return endif @@ -702,34 +702,34 @@ contains ! do ! Beware: (iret < 0) means that the heap is empty, not an error. - call heap%get_first(k,iret) + call heap%get_first(k,iret) if (iret < 0) return - ! + ! ! Just in case an index has been put on the heap more than once. ! if (k == lastk) cycle - lastk = k + lastk = k nidx = nidx + 1 - if (nidx>size(idxs)) then + if (nidx>size(idxs)) then call psb_realloc(nidx+psb_heap_resize,idxs,info) if (info /= psb_success_) return end if idxs(nidx) = k - - if ((row(k) /= szero).and.(rowlevs(k) <= fill_in).and.(ki) then + else if (j>i) then ! ! Copy the upper part of the row - ! - if (rowlevs(j) <= fill_in) then - l2 = l2 + 1 - if (size(uval) < l2) then - ! + ! + if (rowlevs(j) <= fill_in) then + l2 = l2 + 1 + if (size(uval) < l2) then + ! ! Figure out a good reallocation size! ! isz = max((l2/i)*m,int(1.2*l2),l2+100) - call psb_realloc(isz,uval,info) - if (info == psb_success_) call psb_realloc(isz,uja,info) + call psb_realloc(isz,uval,info) + if (info == psb_success_) call psb_realloc(isz,uja,info) if (info == psb_success_) call psb_realloc(isz,uplevs,info,pad=(m+1)) - if (info /= psb_success_) then + if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') goto 9999 @@ -941,11 +941,11 @@ contains end if uja(l2) = j uval(l2) = row(j) - uplevs(l2) = rowlevs(j) + uplevs(l2) = rowlevs(j) else if (ialg == psb_milu_n_) then ! ! MILU(k): add discarded entries to the diagonal one - ! + ! d(i) = d(i) + row(j) end if ! @@ -963,13 +963,13 @@ contains lirp(i+1) = l1 + 1 uirp(i+1) = l2 + 1 - ! + ! ! Check the pivot size ! if (abs(d(i)) < s_epstol) then ! ! Too small pivot: unstable factorization - ! + ! info = psb_err_pivot_too_small_ int_err(1) = i write(ch_err,'(g20.10)') d(i) diff --git a/prec/impl/psb_s_ilut_fact.f90 b/prec/impl/psb_s_ilut_fact.f90 index 33b4374c..43cacf41 100644 --- a/prec/impl/psb_s_ilut_fact.f90 +++ b/prec/impl/psb_s_ilut_fact.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,21 +27,21 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! +! ! Moved here from MLD2P4, original copyright below. -! -! -! +! +! +! ! MLD2P4 version 2.2 ! MultiLevel Domain Decomposition Parallel Preconditioners Package ! based on PSBLAS (Parallel Sparse BLAS version 3.5) -! -! (C) Copyright 2008-2018 -! -! Salvatore Filippone -! Pasqua D'Ambra -! Daniela di Serafino -! +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -53,7 +53,7 @@ ! 3. The name of the MLD2P4 group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -65,8 +65,8 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! ! File: psb_silut_fact.f90 ! ! Subroutine: psb_silut_fact @@ -87,7 +87,7 @@ ! CSR. The diagonal of the U factor is stored separately (actually, the ! inverse of the diagonal entries is stored; this is then managed in the ! solve stage associated to the ILU(k,t) factorization). -! +! ! ! Arguments: ! fill_in - integer, input. @@ -114,7 +114,7 @@ ! factorization. ! Note: its allocation is managed by the calling routine psb_ilu_bld, ! hence it cannot be only intent(out). -! info - integer, output. +! info - integer, output. ! Error code. ! blck - type(psb_sspmat_type), input, optional, target. ! The sparse matrix structure containing the remote rows of the @@ -122,9 +122,9 @@ ! to build an Additive Schwarz base preconditioner with overlap ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck does not contain any row. -! +! subroutine psb_silut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) - + use psb_base_mod use psb_s_ilu_fact_mod, psb_protect_name => psb_silut_fact @@ -141,7 +141,7 @@ subroutine psb_silut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) integer(psb_ipk_), intent(in), optional :: iscale ! Local Variables integer(psb_ipk_) :: l1, l2, m, err_act, iscale_ - + type(psb_sspmat_type), pointer :: blck_ type(psb_s_csr_sparse_mat) :: ll, uu real(psb_spk_) :: scale @@ -151,34 +151,34 @@ subroutine psb_silut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) info = psb_success_ call psb_erractionsave(err_act) - if (fill_in < 0) then + if (fill_in < 0) then info=psb_err_input_asize_invalid_i_ call psb_errpush(info,name, & & i_err=(/ione,fill_in,izero,izero,izero/)) goto 9999 end if - ! + ! ! Point to / allocate memory for the incomplete factorization ! - if (present(blck)) then + if (present(blck)) then blck_ => blck else - allocate(blck_,stat=info) - if (info == psb_success_) call blck_%csall(izero,izero,info,ione) + allocate(blck_,stat=info) + if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR') if (info /= psb_success_) then info=psb_err_from_subroutine_ - ch_err='csall' + ch_err='allocate' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if endif - if (present(iscale)) then + if (present(iscale)) then iscale_ = iscale else iscale_ = psb_ilu_scale_none_ end if - - select case(iscale_) + + select case(iscale_) case(psb_ilu_scale_none_) scale = sone case(psb_ilu_scale_maxval_) @@ -189,10 +189,10 @@ subroutine psb_silut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) call psb_errpush(info,name,i_err=(/ione*9,iscale_,izero,izero,izero/)) goto 9999 end select - + m = a%get_nrows() + blck_%get_nrows() if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.& - & (m > size(d)) ) then + & (m > size(d)) ) then write(0,*) 'Wrong allocation status for L,D,U? ',& & l%get_nrows(),size(d),u%get_nrows() info = -1 @@ -225,15 +225,15 @@ subroutine psb_silut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) call u%set_triangle() call u%set_unit() call u%set_upper() - + ! ! Nullify pointer / deallocate memory ! - if (present(blck)) then - blck_ => null() + if (present(blck)) then + blck_ => null() else call blck_%free() - deallocate(blck_,stat=info) + deallocate(blck_,stat=info) if(info.ne.0) then info=psb_err_from_subroutine_ ch_err='psb_sp_free' @@ -298,7 +298,7 @@ contains ! according to the CSR storage format. ! lirp - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the L factor in lval, according to the CSR storage format. + ! of the L factor in lval, according to the CSR storage format. ! uval - real(psb_spk_), dimension(:), input/output. ! The U factor in the incomplete factorization. ! The entries of U are stored according to the CSR format. @@ -307,12 +307,12 @@ contains ! according to the CSR storage format. ! uirp - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the U factor in uval, according to the CSR storage format. + ! of the U factor in uval, according to the CSR storage format. ! l1 - integer, output ! The number of nonzero entries in lval. ! l2 - integer, output ! The number of nonzero entries in uval. - ! info - integer, output. + ! info - integer, output. ! Error code. ! subroutine psb_silut_factint(fill_in,thres,a,b,& @@ -320,7 +320,7 @@ contains use psb_base_mod - implicit none + implicit none ! Arguments integer(psb_ipk_), intent(in) :: fill_in @@ -355,7 +355,7 @@ contains m = ma+mb ! - ! Allocate a temporary buffer for the ilut_copyin function + ! Allocate a temporary buffer for the ilut_copyin function ! call trw%allocate(izero,izero,ione) if (info == psb_success_) call psb_ensure_size(m+1,lirp,info) @@ -366,7 +366,7 @@ contains call psb_errpush(info,name,a_err='psb_sp_all') goto 9999 end if - + l1=0 l2=0 lirp(1) = 1 @@ -396,10 +396,10 @@ contains ! in the ilut_copyin function, and updated during the elimination, in ! the ilut_fact routine. The heap is ideal because at each step we need ! the lowest index, but we also need to insert new items, and the heap - ! allows to do both in log time. + ! allows to do both in log time. ! d(i) = czero - if (i<=ma) then + if (i<=ma) then call ilut_copyin(i,ma,a,i,ione,m,nlw,nup,jmaxup,nrmi,weight,& & row,heap,ktrw,trw,info) else @@ -414,7 +414,7 @@ contains & d,uja,uirp,uval,nidx,idxs,info) ! ! Copy the row into lval/d(i)/uval - ! + ! if (info == psb_success_) call ilut_copyout(fill_in,thres,i,m,& & nlw,nup,jmaxup,nrmi,row,nidx,idxs,& & l1,l2,lja,lirp,lval,d,uja,uirp,uval,info) @@ -429,7 +429,7 @@ contains ! ! Adjust diagonal accounting for scale factor ! - if (weight /= sone) then + if (weight /= sone) then d(1:m) = d(1:m)*weight end if @@ -469,7 +469,7 @@ contains ! - storing into a heap the column indices of the nonzero entries of the copied ! row; ! - computing the column index of the first entry with maximum absolute value - ! in the part of the row belonging to the upper triangle; + ! in the part of the row belonging to the upper triangle; ! - computing the 2-norm of the row. ! The output array row is such that it contains a full row of A, i.e. it contains ! also the zero entries of the row. This is useful for the elimination step @@ -482,11 +482,11 @@ contains ! ! This routine is used by psb_silut_factint in the computation of the ILU(k,t) ! factorization of a local sparse matrix. - ! + ! ! ! Arguments: ! i - integer, input. - ! The local index of the row to be extracted from the + ! The local index of the row to be extracted from the ! sparse matrix structure a. ! m - integer, input. ! The number of rows of the local matrix stored into a. @@ -533,7 +533,7 @@ contains ! staging buffer trw. See below. ! trw - type(psb_sspmat_type), input/output. ! A staging buffer. If the matrix A is not in CSR format, we use - ! the psb_sp_getblk routine and store its output in trw; when we + ! the psb_sp_getblk routine and store its output in trw; when we ! need to call psb_sp_getblk we do it for a block of rows, and then ! we consume them from trw in successive calls to this routine, ! until we empty the buffer. Thus we will make a call to psb_sp_getblk @@ -542,7 +542,7 @@ contains subroutine ilut_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,& & nrmi,weight,row,heap,ktrw,trw,info) use psb_base_mod - implicit none + implicit none type(psb_sspmat_type), intent(in) :: a type(psb_s_coo_sparse_mat), intent(inout) :: trw integer(psb_ipk_), intent(in) :: i, m,jmin,jmax,jd @@ -584,29 +584,29 @@ contains dmaxup = szero nrmi = szero - select type (aa=> a%a) - type is (psb_s_csr_sparse_mat) + select type (aa=> a%a) + type is (psb_s_csr_sparse_mat) ! ! Take a fast shortcut if the matrix is stored in CSR format ! do j = aa%irp(i), aa%irp(i+1) - 1 k = aa%ja(j) - if ((jmin<=k).and.(k<=jmax)) then - row(k) = aa%val(j)*weight + if ((jmin<=k).and.(k<=jmax)) then + row(k) = aa%val(j)*weight call heap%insert(k,info) if (info /= psb_success_) exit - if (kjd) then + if (kjd) then nup = nup + 1 - if (abs(row(k))>dmaxup) then + if (abs(row(k))>dmaxup) then jmaxup = k dmaxup = abs(row(k)) end if end if end if end do - if (info /= psb_success_) then + if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='psb_insert_heap') goto 9999 @@ -617,16 +617,16 @@ contains class default - + ! - ! Otherwise use psb_sp_getblk, slower but able (in principle) of + ! Otherwise use psb_sp_getblk, slower but able (in principle) of ! handling any format. In this case, a block of rows is extracted ! instead of a single row, for performance reasons, and these ! rows are copied one by one into the array row, through successive ! calls to ilut_copyin. ! - if ((mod(i,nrb) == 1).or.(nrb == 1)) then + if ((mod(i,nrb) == 1).or.(nrb == 1)) then irb = min(m-i+1,nrb) call aa%csget(i,i+irb-1,trw,info) if (info /= psb_success_) then @@ -639,18 +639,18 @@ contains kin = ktrw nz = trw%get_nzeros() - do + do if (ktrw > nz) exit if (trw%ia(ktrw) > i) exit k = trw%ja(ktrw) - if ((jmin<=k).and.(k<=jmax)) then + if ((jmin<=k).and.(k<=jmax)) then row(k) = trw%val(ktrw)*weight call heap%insert(k,info) if (info /= psb_success_) exit - if (kjd) then + if (kjd) then nup = nup + 1 - if (abs(row(k))>dmaxup) then + if (abs(row(k))>dmaxup) then jmaxup = k dmaxup = abs(row(k)) end if @@ -734,10 +734,10 @@ contains use psb_base_mod - implicit none + implicit none ! Arguments - type(psb_i_heap), intent(inout) :: heap + type(psb_i_heap), intent(inout) :: heap integer(psb_ipk_), intent(in) :: i integer(psb_ipk_), intent(inout) :: nidx,info real(psb_spk_), intent(in) :: thres,nrmi @@ -753,23 +753,23 @@ contains call psb_ensure_size(200*ione,idxs,info) if (info /= psb_success_) return nidx = 0 - lastk = -1 + lastk = -1 ! ! Do while there are indices to be processed ! do - call heap%get_first(k,iret) + call heap%get_first(k,iret) if (iret < 0) exit - ! + ! ! An index may have been put on the heap more than once. ! if (k == lastk) cycle - lastk = k + lastk = k lowert: if (k nidx) exit if (idxs(idxp) >= i) exit @@ -987,8 +987,8 @@ contains ! if (abs(witem) < thres*nrmi) cycle - nz = nz + 1 - xw(nz) = witem + nz = nz + 1 + xw(nz) = witem xwid(nz) = widx call heap%insert(witem,widx,info) if (info /= psb_success_) then @@ -1000,9 +1000,9 @@ contains ! ! Now we have to take out the first nlw+fill_in entries - ! + ! if (nz <= nlw+fill_in) then - ! + ! ! Just copy everything from xw, and it is already ordered ! else @@ -1014,7 +1014,7 @@ contains call psb_errpush(info,name,a_err='psb_heap_get_first') goto 9999 end if - + xw(k) = witem xwid(k) = widx end do @@ -1029,15 +1029,15 @@ contains ! Copy out the lower part of the row ! do k=1,nz - l1 = l1 + 1 + l1 = l1 + 1 if (size(lval) < l1) then - ! + ! ! Figure out a good reallocation size! - ! + ! isz = (max((l1/i)*m,int(1.2*l1),l1+100)) - call psb_realloc(isz,lval,info) - if (info == psb_success_) call psb_realloc(isz,lja,info) - if (info /= psb_success_) then + call psb_realloc(isz,lval,info) + if (info == psb_success_) call psb_realloc(isz,lja,info) + if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') goto 9999 @@ -1046,25 +1046,25 @@ contains lja(l1) = xwid(k) lval(l1) = xw(indx(k)) end do - + ! ! Make sure idxp points to the diagonal entry ! - if (idxp <= size(idxs)) then - if (idxs(idxp) < i) then - do + if (idxp <= size(idxs)) then + if (idxs(idxp) < i) then + do idxp = idxp + 1 if (idxp > nidx) exit if (idxs(idxp) >= i) exit end do end if end if - if (idxp > size(idxs)) then + if (idxp > size(idxs)) then !!$ write(0,*) 'Warning: missing diagonal element in the row ' else - if (idxs(idxp) > i) then + if (idxs(idxp) > i) then !!$ write(0,*) 'Warning: missing diagonal element in the row ' - else if (idxs(idxp) /= i) then + else if (idxs(idxp) /= i) then !!$ write(0,*) 'Warning: impossible error: diagonal has vanished' else ! @@ -1076,7 +1076,7 @@ contains if (abs(d(i)) < s_epstol) then ! ! Too small pivot: unstable factorization - ! + ! info = psb_err_pivot_too_small_ int_err(1) = i write(ch_err,'(g20.10)') d(i) @@ -1092,7 +1092,7 @@ contains end if ! - ! Now the upper part + ! Now the upper part ! call heap%init(info,dir=psb_asort_down_) @@ -1104,15 +1104,15 @@ contains nz = 0 do - + idxp = idxp + 1 if (idxp > nidx) exit widx = idxs(idxp) - if (widx <= i) then + if (widx <= i) then !!$ write(0,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp) cycle end if - if (widx > m) then + if (widx > m) then !!$ write(0,*) 'Warning: impossible value',widx,i,idxp,idxs(idxp) cycle end if @@ -1120,12 +1120,12 @@ contains ! ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway. ! - if ((widx /= jmaxup) .and. (abs(witem) < thres*nrmi)) then - cycle + if ((widx /= jmaxup) .and. (abs(witem) < thres*nrmi)) then + cycle end if nz = nz + 1 - xw(nz) = witem + xw(nz) = witem xwid(nz) = widx call heap%insert(witem,widx,info) if (info /= psb_success_) then @@ -1133,7 +1133,7 @@ contains call psb_errpush(info,name,a_err='psb_insert_heap') goto 9999 end if - + end do ! @@ -1141,7 +1141,7 @@ contains ! we include entry jmaxup. ! if (nz <= nup+fill_in) then - ! + ! ! Just copy everything from xw ! fndmaxup=.true. @@ -1155,13 +1155,13 @@ contains if (widx == jmaxup) fndmaxup=.true. end do end if - if ((i psb_s_invk_bld + use psb_s_ilu_fact_mod + implicit none + + ! Arguments + type(psb_sspmat_type), intent(in), target :: a + integer(psb_ipk_), intent(in) :: fill1, fill2 + type(psb_sspmat_type), intent(inout) :: lmat, umat + real(psb_spk_), allocatable :: d(:) + Type(psb_desc_type), Intent(inout) :: desc + integer(psb_ipk_), intent(out) :: info + type(psb_sspmat_type), intent(in), optional :: blck + ! + integer(psb_ipk_) :: i, nztota, err_act, n_row, nrow_a, n_col + type(psb_sspmat_type) :: atmp + real(psb_spk_), allocatable :: pq(:), pd(:) + integer(psb_ipk_), allocatable :: uplevs(:) + integer(psb_ipk_) :: debug_level, debug_unit + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np,me + integer(psb_ipk_) :: nzrmax + character(len=20) :: name, ch_err + + + info = psb_success_ + name='psb_sinvk_bld' + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + ctxt = psb_cd_get_context(desc) + call psb_info(ctxt, me, np) + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' start' + + ! + ! Check the memory available to hold the incomplete L and U factors + ! and allocate it if needed + ! + nrow_a = a%get_nrows() + nztota = a%get_nzeros() + + if (present(blck)) then + nztota = nztota + blck%get_nzeros() + end if + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & ': out get_nnzeros',nrow_a,nztota,& + & a%get_nrows(),a%get_ncols(),a%get_nzeros() + + + n_row = psb_cd_get_local_rows(desc) + n_col = psb_cd_get_local_cols(desc) + allocate(pd(n_row),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + + + call lmat%allocate(n_row,n_row,info,nz=nztota) + if (info == psb_success_) call umat%allocate(n_row,n_row,info,nz=nztota) + + + call psb_iluk_fact(fill1,psb_ilu_n_,a,lmat,umat,pd,info,blck=blck) + !,uplevs=uplevs) + !call psb_siluk_fact(fill1,psb_ilu_n_,a,lmat,umat,pd,info,blck=blck) + + if (info == psb_success_) call atmp%allocate(n_row,n_row,info,nz=nztota) + if(info/=0) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + + ! + ! Compute the aprox U^-1 and L^-1 + ! + call psb_sparse_invk(n_row,umat,atmp,fill2,info) + if (info == psb_success_) call psb_move_alloc(atmp,umat,info) + if (info == psb_success_) call lmat%transp() + if (info == psb_success_) call psb_sparse_invk(n_row,lmat,atmp,fill2,info) + if (info == psb_success_) call psb_move_alloc(atmp,lmat,info) + if (info == psb_success_) call lmat%transp() + ! Done. Hopefully.... + + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invt') + goto 9999 + end if + + call psb_move_alloc(pd,d,info) + call lmat%set_asb() + call lmat%trim() + call umat%set_asb() + call umat%trim() + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' end' + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_s_invk_bld + +subroutine psb_ssparse_invk(n,a,z,fill_in,info,inlevs) + + use psb_base_mod + use psb_s_invk_fact_mod, psb_protect_name => psb_ssparse_invk + + integer(psb_ipk_), intent(in) :: n + type(psb_sspmat_type), intent(in) :: a + type(psb_sspmat_type), intent(inout) :: z + integer(psb_ipk_), intent(in) :: fill_in + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: inlevs(:) + ! + integer(psb_ipk_) :: i,j,k, err_act, nz, nzra, nzrz, ipz1,ipz2, nzz, ip1, ip2, l2 + integer(psb_ipk_), allocatable :: ia(:), ja(:), iz(:), jz(:) + real(psb_spk_), allocatable :: zw(:), val(:), valz(:) + integer(psb_ipk_), allocatable :: uplevs(:), rowlevs(:), idxs(:) + real(psb_spk_), allocatable :: row(:) + type(psb_s_coo_sparse_mat) :: trw + type(psb_s_csr_sparse_mat) :: acsr, zcsr + integer(psb_ipk_) :: ktrw, nidx + type(psb_i_heap) :: heap + + real(psb_dpk_) :: alpha + character(len=20) :: name='psb_sp_invk' + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + if (.not.(a%is_triangle().and.a%is_unit().and.a%is_upper())) then + write(psb_err_unit,*) 'Wrong A ' + info = psb_err_internal_error_ + call psb_errpush(psb_err_internal_error_,name,a_err='wrong A') + goto 9999 + end if + call a%cp_to(acsr) + call trw%allocate(izero,izero,ione) + if (info == psb_success_) allocate(zw(n),iz(n),valz(n),& + & row(n),rowlevs(n),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + + allocate(uplevs(acsr%get_nzeros()),stat=info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='Allocate') + goto 9999 + end if + uplevs(:) = 0 + row(:) = szero + rowlevs(:) = -(n+1) + + call zcsr%allocate(n,n,n*fill_in) + call zcsr%set_triangle() + call zcsr%set_unit(.false.) + call zcsr%set_upper() + call psb_ensure_size(n+1, idxs, info) + + + ! + ! + zcsr%irp(1) = 1 + nzz = 0 + + l2 = 0 + outer: do i = 1, n-1 + ! ZW = e_i + call psb_invk_copyin(i,n,acsr,ione,n,row,rowlevs,heap,ktrw,trw,info,& + & sign=-sone,inlevs=inlevs) + row(i) = sone + rowlevs(i) = 0 + + ! Update loop + call psb_invk_inv(fill_in,i,row,rowlevs,heap,& + & acsr%ja,acsr%irp,acsr%val,uplevs,nidx,idxs,info) + + call psb_invk_copyout(fill_in,i,n,row,rowlevs,nidx,idxs,& + & l2,zcsr%ja,zcsr%irp,zcsr%val,info) + + nzz = l2 + end do outer + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='mainloop') + goto 9999 + end if + ipz1 = nzz+1 + call psb_ensure_size(ipz1,zcsr%val,info) + call psb_ensure_size(ipz1,zcsr%ja,info) + zcsr%val(ipz1) = sone + zcsr%ja(ipz1) = n + zcsr%irp(n+1) = ipz1+1 + call zcsr%set_sorted() + call z%mv_from(zcsr) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_ssparse_invk + +subroutine psb_s_invk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,ktrw,trw,info,sign,inlevs) + + use psb_base_mod + use psb_s_invk_fact_mod, psb_protect_name => psb_s_invk_copyin + + implicit none + + ! Arguments + type(psb_s_csr_sparse_mat), intent(in) :: a + type(psb_s_coo_sparse_mat), intent(inout) :: trw + integer(psb_ipk_), intent(in) :: i,m,jmin,jmax + integer(psb_ipk_), intent(inout) :: ktrw,info + integer(psb_ipk_), intent(inout) :: rowlevs(:) + real(psb_spk_), intent(inout) :: row(:) + type(psb_i_heap), intent(inout) :: heap + real(psb_spk_), optional, intent(in) :: sign + integer(psb_ipk_), intent(in), optional :: inlevs(:) + + ! Local variables + integer(psb_ipk_) :: k,j,irb,err_act, nz + integer(psb_ipk_), parameter :: nrb=16 + real(psb_spk_) :: sign_ + character(len=20), parameter :: name='invk_copyin' + character(len=20) :: ch_err + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + call heap%init(info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_init_heap') + goto 9999 + end if + + if (present(sign)) then + sign_ = sign + else + sign_ = sone + end if + + + ! + ! Take a fast shortcut if the matrix is stored in CSR format + ! + if (present(inlevs)) then + do j = a%irp(i), a%irp(i+1) - 1 + k = a%ja(j) + if ((jmin<=k).and.(k<=jmax)) then + row(k) = sign_ * a%val(j) + rowlevs(k) = inlevs(j) + call heap%insert(k,info) + end if + end do + else + do j = a%irp(i), a%irp(i+1) - 1 + k = a%ja(j) + if ((jmin<=k).and.(k<=jmax)) then + row(k) = sign_ * a%val(j) + rowlevs(k) = 0 + call heap%insert(k,info) + end if + end do + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_s_invk_copyin + + +subroutine psb_s_invk_copyout(fill_in,i,m,row,rowlevs,nidx,idxs,& + & l2,uia1,uia2,uaspk,info) + + use psb_base_mod + use psb_s_invk_fact_mod, psb_protect_name => psb_s_invk_copyout + + implicit none + + ! Arguments + integer(psb_ipk_), intent(in) :: fill_in, i, m, nidx + integer(psb_ipk_), intent(inout) :: l2, info + integer(psb_ipk_), intent(inout) :: rowlevs(:), idxs(:) + integer(psb_ipk_), allocatable, intent(inout) :: uia1(:), uia2(:) + real(psb_spk_), allocatable, intent(inout) :: uaspk(:) + real(psb_spk_), intent(inout) :: row(:) + + ! Local variables + integer(psb_ipk_) :: j,isz,err_act,int_err(5),idxp + character(len=20), parameter :: name='psb_siluk_factint' + character(len=20) :: ch_err + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + do idxp=1,nidx + + j = idxs(idxp) + + if (j>=i) then + ! + ! Copy the upper part of the row + ! + if (rowlevs(j) <= fill_in) then + l2 = l2 + 1 + if (size(uaspk) < l2) then + ! + ! Figure out a good reallocation size! + ! + isz = max(int(1.2*l2),l2+100) + call psb_realloc(isz,uaspk,info) + if (info == psb_success_) call psb_realloc(isz,uia1,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='Allocate') + goto 9999 + end if + end if + uia1(l2) = j + uaspk(l2) = row(j) + end if + ! + ! Re-initialize row(j) and rowlevs(j) + ! + row(j) = szero + rowlevs(j) = -(m+1) + end if + end do + + uia2(i+1) = l2 + 1 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_s_invk_copyout + +subroutine psb_sinvk_inv(fill_in,i,row,rowlevs,heap,ja,irp,val,uplevs,& + & nidx,idxs,info) + + use psb_base_mod + use psb_s_invk_fact_mod, psb_protect_name => psb_sinvk_inv + + implicit none + + ! Arguments + type(psb_i_heap), intent(inout) :: heap + integer(psb_ipk_), intent(in) :: i, fill_in + integer(psb_ipk_), intent(inout) :: nidx,info + integer(psb_ipk_), intent(inout) :: rowlevs(:) + integer(psb_ipk_), allocatable, intent(inout) :: idxs(:) + integer(psb_ipk_), intent(in) :: ja(:),irp(:),uplevs(:) + real(psb_spk_), intent(in) :: val(:) + real(psb_spk_), intent(inout) :: row(:) + + ! Local variables + integer(psb_ipk_) :: k,j,lrwk,jj,lastk, iret + real(psb_dpk_) :: rwk + + + info = psb_success_ + + call psb_ensure_size(200, idxs, info) + if (info /= psb_success_) return + nidx = 1 + idxs(1) = i + lastk = i + + ! + ! Do while there are indices to be processed + ! + do + ! Beware: (iret < 0) means that the heap is empty, not an error. + call heap%get_first(k,iret) + if (iret < 0) then +!!$ write(psb_err_unit,*) 'IINVK: ',i,' returning at ',lastk + return + end if + + ! + ! Just in case an index has been put on the heap more than once. + ! + if (k == lastk) cycle + + lastk = k + nidx = nidx + 1 + if (nidx>size(idxs)) then + call psb_realloc(nidx+psb_heap_resize,idxs,info) + if (info /= psb_success_) return + end if + idxs(nidx) = k + + if ((row(k) /= szero).and.(rowlevs(k) <= fill_in)) then + ! + ! Note: since U is scaled while copying it out (see iluk_copyout), + ! we can use rwk in the update below + ! + rwk = row(k) + lrwk = rowlevs(k) + + do jj=irp(k),irp(k+1)-1 + j = ja(jj) + if (j<=k) then + info = -i + return + endif + ! + ! Insert the index into the heap for further processing. + ! The fill levels are initialized to a negative value. If we find + ! one, it means that it is an as yet untouched index, so we need + ! to insert it; otherwise it is already on the heap, there is no + ! need to insert it more than once. + ! + if (rowlevs(j)<0) then + call heap%insert(j,info) + if (info /= psb_success_) return + rowlevs(j) = abs(rowlevs(j)) + end if + ! + ! Update row(j) and the corresponding fill level + ! + row(j) = row(j) - rwk * val(jj) + rowlevs(j) = min(rowlevs(j),lrwk+uplevs(jj)+1) + end do + + end if + end do + +end subroutine psb_sinvk_inv diff --git a/prec/impl/psb_s_invt_fact.f90 b/prec/impl/psb_s_invt_fact.f90 new file mode 100644 index 00000000..f311860d --- /dev/null +++ b/prec/impl/psb_s_invt_fact.f90 @@ -0,0 +1,739 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG4PSBLAS, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! + +subroutine psb_s_invt_bld(a,fillin,invfill,thresh,invthresh,& + & lmat,d,umat,desc,info,blck) + + use psb_base_mod + use psb_s_invt_fact_mod, psb_protect_name => psb_s_invt_bld + use psb_s_ilu_fact_mod + implicit none + + ! Arguments + type(psb_sspmat_type), intent(in), target :: a + integer(psb_ipk_), intent(in) :: fillin,invfill + real(psb_spk_), intent(in) :: thresh + real(psb_spk_), intent(in) :: invthresh + type(psb_sspmat_type), intent(inout) :: lmat, umat + real(psb_spk_), allocatable :: d(:) + Type(psb_desc_type), Intent(inout) :: desc + integer(psb_ipk_), intent(out) :: info + type(psb_sspmat_type), intent(in), optional :: blck + ! + integer(psb_ipk_) :: i, nztota, err_act, n_row, nrow_a, n_col + type(psb_sspmat_type) :: atmp + real(psb_spk_), allocatable :: pq(:), pd(:), w(:) + integer(psb_ipk_) :: debug_level, debug_unit + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np,me + integer(psb_ipk_) :: nzrmax + real(psb_spk_) :: sp_thresh + character(len=20) :: name, ch_err, fname + + + info = psb_success_ + name='psb_sinvt_fact' + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + ctxt = psb_cd_get_context(desc) + call psb_info(ctxt, me, np) + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' start' + + ! + ! Check the memory available to hold the incomplete L and U factors + ! and allocate it if needed + ! + nrow_a = a%get_nrows() + nztota = a%get_nzeros() + + if (present(blck)) then + nztota = nztota + blck%get_nzeros() + end if + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & ': out get_nnzeros',nrow_a,nztota,& + & a%get_nrows(),a%get_ncols(),a%get_nzeros() + + + n_row = psb_cd_get_local_rows(desc) + n_col = psb_cd_get_local_cols(desc) + allocate(pd(n_row),w(n_row),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + + nzrmax = fillin + sp_thresh = thresh + + call lmat%allocate(n_row,n_row,info,nz=nztota) + if (info == psb_success_) call umat%allocate(n_row,n_row,info,nz=nztota) + + if (info == 0) call psb_ilut_fact(nzrmax,sp_thresh,& + & a,lmat,umat,pd,info,blck=blck,iscale=psb_ilu_scale_maxval_) + + if (info == psb_success_) call atmp%allocate(n_row,n_row,info,nz=nztota) + if(info/=0) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (.false.) then +!!$ if (debug_level >= psb_debug_inner_) then + write(fname,'(a,i0,a)') 'invt-lo-',me,'.mtx' + call lmat%print(fname,head="INVTLOW") + write(fname,'(a,i0,a)') 'invt-up-',me,'.mtx' + call umat%print(fname,head="INVTUPP") + end if + + ! + ! Compute the approx U^-1 and L^-1 + ! + nzrmax = invfill + call psb_ssparse_invt(n_row,umat,atmp,nzrmax,invthresh,info) + if (info == psb_success_) call psb_move_alloc(atmp,umat,info) + if (info == psb_success_) call lmat%transp() + if (info == psb_success_) call psb_ssparse_invt(n_row,lmat,atmp,nzrmax,invthresh,info) + if (info == psb_success_) call psb_move_alloc(atmp,lmat,info) + if (info == psb_success_) call lmat%transp() + ! Done. Hopefully.... + + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invt') + goto 9999 + end if + + call psb_move_alloc(pd,d,info) + call lmat%set_asb() + call lmat%trim() + call umat%set_asb() + call umat%trim() + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' end' + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_s_invt_bld + +subroutine psb_ssparse_invt(n,a,z,nzrmax,sp_thresh,info) + + use psb_base_mod + use psb_s_invt_fact_mod, psb_protect_name => psb_ssparse_invt + + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_sspmat_type), intent(in) :: a + type(psb_sspmat_type), intent(inout) :: z + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_spk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: i,j,k, err_act, nz, nzra, nzrz, ipz1,ipz2, nzz, ip1, ip2, l2 + integer(psb_ipk_), allocatable :: ia(:), ja(:), iz(:),jz(:) + real(psb_spk_), allocatable :: zw(:), val(:), valz(:) + integer(psb_ipk_), allocatable :: uplevs(:), rowlevs(:),idxs(:) + real(psb_spk_), allocatable :: row(:) + type(psb_s_coo_sparse_mat) :: trw + type(psb_s_csr_sparse_mat) :: acsr, zcsr + integer(psb_ipk_) :: ktrw, nidx, nlw,nup,jmaxup + type(psb_i_heap) :: heap + real(psb_spk_) :: alpha, nrmi + character(len=20) :: name='psb_sp_invt' + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + if (.not.(a%is_triangle().and.a%is_unit().and.a%is_upper())) then + write(psb_err_unit,*) 'Wrong A ' + info = psb_err_internal_error_ + call psb_errpush(psb_err_internal_error_,name,a_err='wrong A') + goto 9999 + end if + call a%cp_to(acsr) + call trw%allocate(izero,izero,ione) + if (info == psb_success_) allocate(zw(n),iz(n),valz(n),& + & row(n),rowlevs(n),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + + call zcsr%allocate(n,n,n*nzrmax) + call zcsr%set_triangle() + call zcsr%set_unit(.false.) + call zcsr%set_upper() + ! + ! + nzz = 0 + row(:) = szero + rowlevs(:) = 0 + l2 = 0 + zcsr%irp(1) = 1 + + outer: do i = 1, n-1 + ! ZW = e_i + call psb_s_invt_copyin(i,n,acsr,i,ione,n,nlw,nup,jmaxup,nrmi,row,& + & heap,rowlevs,ktrw,trw,info,sign=-sone) + if (info /= 0) exit + row(i) = sone + ! Adjust norm + if (nrmi < sone) then + nrmi = sqrt(sone + nrmi**2) + else + nrmi = nrmi*sqrt(sone+sone/(nrmi**2)) + end if + + call psb_invt_inv(sp_thresh,i,nrmi,row,heap,rowlevs,& + & acsr%ja,acsr%irp,acsr%val,nidx,idxs,info) + if (info /= 0) exit +!!$ write(0,*) 'Calling copyout ',nzrmax,nlw,nup,nidx,l2 + call psb_s_invt_copyout(nzrmax,sp_thresh,i,n,nlw,nup,jmaxup,nrmi,row,& + & nidx,idxs,l2,zcsr%ja,zcsr%irp,zcsr%val,info) + if (info /= 0) exit + nzz = l2 + end do outer + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='mainloop') + goto 9999 + end if + + ipz1 = nzz+1 + call psb_ensure_size(ipz1,zcsr%val,info) + call psb_ensure_size(ipz1,zcsr%ja,info) + zcsr%val(ipz1) = sone + zcsr%ja(ipz1) = n + zcsr%irp(n+1) = ipz1+1 + + call z%mv_from(zcsr) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_ssparse_invt + +subroutine psb_s_invt_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,nrmi,row,heap,& + & irwt,ktrw,trw,info,sign) + use psb_base_mod + use psb_d_invt_fact_mod, psb_protect_name => psb_d_invt_copyin + implicit none + type(psb_s_csr_sparse_mat), intent(in) :: a + type(psb_s_coo_sparse_mat), intent(inout) :: trw + integer(psb_ipk_), intent(in) :: i, m,jmin,jmax,jd + integer(psb_ipk_), intent(inout) :: ktrw,nlw,nup,jmaxup,info + integer(psb_ipk_), intent(inout) :: irwt(:) + real(psb_spk_), intent(inout) :: nrmi + real(psb_spk_), intent(inout) :: row(:) + type(psb_i_heap), intent(inout) :: heap + real(psb_spk_), intent(in), optional :: sign + ! + integer(psb_ipk_) :: k,j,irb,kin,nz, err_act + integer(psb_ipk_), parameter :: nrb=16 + real(psb_dpk_) :: dmaxup, sign_ + real(psb_dpk_), external :: dnrm2 + character(len=20), parameter :: name='invt_copyin' + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + call heap%init(info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_init_heap') + goto 9999 + end if + sign_ = sone + if (present(sign)) sign_ = sign + ! + ! nrmi is the norm of the current sparse row (for the time being, + ! we use the 2-norm). + ! NOTE: the 2-norm below includes also elements that are outside + ! [jmin:jmax] strictly. Is this really important? TO BE CHECKED. + ! + + nlw = 0 + nup = 0 + jmaxup = 0 + dmaxup = szero + nrmi = szero + + do j = a%irp(i), a%irp(i+1) - 1 + k = a%ja(j) + if ((jmin<=k).and.(k<=jmax)) then + row(k) = sign_ * a%val(j) + call heap%insert(k,info) + irwt(k) = 1 + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + goto 9999 + end if + end if + if (kjd) then + nup = nup + 1 + if (abs(row(k))>dmaxup) then + jmaxup = k + dmaxup = abs(row(k)) + end if + end if + end do + nz = a%irp(i+1) - a%irp(i) + nrmi = dnrm2(nz,a%val(a%irp(i):),ione) + + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_s_invt_copyin + +subroutine psb_s_invt_copyout(fill_in,thres,i,m,nlw,nup,jmaxup,nrmi,row, & + & nidx,idxs,l2,ja,irp,val,info) + + use psb_base_mod + use psb_s_invt_fact_mod, psb_protect_name => psb_s_invt_copyout + + implicit none + + ! Arguments + integer(psb_ipk_), intent(in) :: fill_in,i,m,nidx,nlw,nup,jmaxup + integer(psb_ipk_), intent(in) :: idxs(:) + integer(psb_ipk_), intent(inout) :: l2, info + integer(psb_ipk_), allocatable, intent(inout) :: ja(:),irp(:) + real(psb_spk_), intent(in) :: thres,nrmi + real(psb_spk_),allocatable, intent(inout) :: val(:) + real(psb_spk_), intent(inout) :: row(:) + + ! Local variables + real(psb_dpk_),allocatable :: xw(:) + integer(psb_ipk_), allocatable :: xwid(:), indx(:) + real(psb_dpk_) :: witem, wmin + integer(psb_ipk_) :: widx + integer(psb_ipk_) :: k,isz,err_act,int_err(5),idxp, nz + type(psb_d_idx_heap) :: heap + character(len=20), parameter :: name='invt_copyout' + character(len=20) :: ch_err + logical :: fndmaxup + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + ! + ! Here we need to apply also the dropping rule base on the fill-in. + ! We do it by putting into a heap the elements that are not dropped + ! by using the 2-norm rule, and then copying them out. + ! + ! The heap goes down on the entry absolute value, so the first item + ! is the largest absolute value. + ! +!!$ write(0,*) 'invt_copyout ',nidx,nup+fill_in + call heap%init(info,dir=psb_asort_down_) + + if (info == psb_success_) allocate(xwid(nidx),xw(nidx),indx(nidx),stat=info) + if (info /= psb_success_) then + info=psb_err_alloc_request_ + call psb_errpush(info,name,i_err=(/3*nidx/),& + & a_err='real(psb_dpk_)') + goto 9999 + end if + + ! + ! First the lower part + ! + + nz = 0 + idxp = 0 + + do + + idxp = idxp + 1 + if (idxp > nidx) exit + if (idxs(idxp) >= i) exit + widx = idxs(idxp) + witem = row(widx) + ! + ! Dropping rule based on the 2-norm + ! + if (abs(witem) < thres*nrmi) cycle + + nz = nz + 1 + xw(nz) = witem + xwid(nz) = widx + call heap%insert(witem,widx,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + goto 9999 + end if + end do + + if (nz > 1) then + write(psb_err_unit,*) 'Warning: lower triangle from invt???? ' + end if + + + if (idxp <= size(idxs)) then + if (idxs(idxp) < i) then + do + idxp = idxp + 1 + if (idxp > nidx) exit + if (idxs(idxp) >= i) exit + end do + end if + end if + idxp = idxp - 1 + nz = 0 + wmin=HUGE(wmin) + if (.false.) then + do + + idxp = idxp + 1 + if (idxp > nidx) exit + widx = idxs(idxp) + if (widx < i) then + write(psb_err_unit,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp) + cycle + end if + if (widx > m) then + cycle + end if + witem = row(widx) + ! + ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway. + ! + if ((widx /= jmaxup) .and. (widx /= i) .and. (abs(witem) < thres*nrmi)) then + cycle + end if + if ((widx/=jmaxup).and.(nz > nup+fill_in)) then + if (abs(witem) < wmin) cycle + endif + wmin = min(abs(witem),wmin) + nz = nz + 1 + xw(nz) = witem + xwid(nz) = widx + call heap%insert(witem,widx,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + goto 9999 + end if + + end do + + ! + ! Now we have to take out the first nup-fill_in entries. But make sure + ! we include entry jmaxup. + ! + if (nz <= nup+fill_in) then + ! + ! Just copy everything from xw + ! + fndmaxup=.true. + else + fndmaxup = .false. + nz = nup+fill_in + do k=1,nz + call heap%get_first(witem,widx,info) + xw(k) = witem + xwid(k) = widx + if (widx == jmaxup) fndmaxup=.true. + end do + end if + if ((i nidx) exit + widx = idxs(idxp) + if (widx < i) then + write(psb_err_unit,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp) + cycle + end if + if (widx > m) then + cycle + end if + witem = row(widx) + ! + ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway. + ! + if ((widx /= i) .and. (abs(witem) < thres*nrmi)) then + cycle + end if + if (nz > nup+fill_in) then + if (abs(witem) < wmin) cycle + endif + wmin = min(abs(witem),wmin) + nz = nz + 1 + xw(nz) = witem + xwid(nz) = widx + call heap%insert(witem,widx,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + goto 9999 + end if + + end do + + ! + ! Now we have to take out the first nup-fill_in entries. But make sure + ! we include entry jmaxup. + ! + if (nz > nup+fill_in) then + nz = nup+fill_in + do k=1,nz + call heap%get_first(witem,widx,info) + xw(k) = witem + xwid(k) = widx + end do + end if + end if + + ! + ! Now we put things back into ascending column order + ! + call psb_msort(xwid(1:nz),indx(1:nz),dir=psb_sort_up_) + + ! + ! Copy out the upper part of the row + ! + do k=1,nz + l2 = l2 + 1 + if (size(val) < l2) then + ! + ! Figure out a good reallocation size! + ! + isz = max(int(1.2*l2),l2+100) + call psb_realloc(isz,val,info) + if (info == psb_success_) call psb_realloc(isz,ja,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='Allocate') + goto 9999 + end if + end if + ja(l2) = xwid(k) + val(l2) = xw(indx(k)) + end do + + ! + ! Set row to zero + ! + do idxp=1,nidx + row(idxs(idxp)) = szero + end do + + irp(i+1) = l2 + 1 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_s_invt_copyout + +subroutine psb_s_invt_inv(thres,i,nrmi,row,heap,irwt,ja,irp,val,nidx,idxs,info) + + use psb_base_mod + use psb_s_invt_fact_mod, psb_protect_name => psb_s_invt_inv + + implicit none + + ! Arguments + type(psb_i_heap), intent(inout) :: heap + integer(psb_ipk_), intent(in) :: i + integer(psb_ipk_), intent(inout) :: nidx,info + integer(psb_ipk_), intent(inout) :: irwt(:) + real(psb_spk_), intent(in) :: thres,nrmi + integer(psb_ipk_), allocatable, intent(inout) :: idxs(:) + integer(psb_ipk_), intent(in) :: ja(:),irp(:) + real(psb_spk_), intent(in) :: val(:) + real(psb_spk_), intent(inout) :: row(:) + + ! Local Variables + integer(psb_ipk_) :: k,j,jj,lastk,iret + real(psb_dpk_) :: rwk, alpha + + info = psb_success_ + + call psb_ensure_size(200, idxs, info) + if (info /= psb_success_) return + nidx = 1 + idxs(1) = i + lastk = i + irwt(i) = 1 +!!$ write(0,*) 'Drop Threshold ',thres*nrmi + ! + ! Do while there are indices to be processed + ! + do + + call heap%get_first(k,iret) + if (iret < 0) exit + + ! + ! An index may have been put on the heap more than once. + ! Should not happen, but just in case. + ! + if (k == lastk) cycle + lastk = k + + ! + ! Dropping rule based on the threshold: compare the absolute + ! value of each updated entry of row with thres * 2-norm of row. + ! + rwk = row(k) + + if (abs(rwk) < thres*nrmi) then + ! + ! Drop the entry. + ! + row(k) = dzero + irwt(k) = 0 + cycle + else + ! + ! Note: since U is scaled while copying it out (see ilut_copyout), + ! we can use rwk in the update below. + ! + do jj=irp(k),irp(k+1)-1 + j = ja(jj) + if (j<=k) then + info = -i + return + endif + ! + ! Update row(j) and, if it is not to be discarded, insert + ! its index into the heap for further processing. + ! + row(j) = row(j) - rwk * val(jj) + if (irwt(j) == 0) then + if (abs(row(j)) < thres*nrmi) then + ! + ! Drop the entry. + ! + row(j) = dzero + else + ! + ! Do the insertion. + ! + call heap%insert(j,info) + if (info /= psb_success_) return + irwt(j) = 1 + end if + end if + end do + end if + + ! + ! If we get here it is an index we need to keep on copyout. + ! + + nidx = nidx + 1 + call psb_ensure_size(nidx,idxs,info,addsz=psb_heap_resize) + if (info /= psb_success_) return + idxs(nidx) = k + irwt(k) = 0 + end do + + irwt(i) = 0 +end subroutine psb_s_invt_inv diff --git a/prec/impl/psb_s_prec_type_impl.f90 b/prec/impl/psb_s_prec_type_impl.f90 index 4379322b..4272ba75 100644 --- a/prec/impl/psb_s_prec_type_impl.f90 +++ b/prec/impl/psb_s_prec_type_impl.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,14 +27,14 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! -! +! +! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -46,7 +46,7 @@ !!$ 3. The name of the PSBLAS group or the names of its contributors may !!$ not be used to endorse or promote products derived from this !!$ software without specific written permission. -!!$ +!!$ !!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS !!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED !!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -58,14 +58,14 @@ !!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) !!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE !!$ POSSIBILITY OF SUCH DAMAGE. -!!$ -!!$ +!!$ +!!$ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine psb_s_apply2_vect(prec,x,y,desc_data,info,trans,work) use psb_base_mod use psb_s_prec_type, psb_protect_name => psb_s_apply2_vect - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_sprec_type), intent(inout) :: prec type(psb_s_vect_type),intent(inout) :: x @@ -74,7 +74,7 @@ subroutine psb_s_apply2_vect(prec,x,y,desc_data,info,trans,work) character(len=1), optional :: trans real(psb_spk_),intent(inout), optional, target :: work(:) - character :: trans_ + character :: trans_ real(psb_spk_), pointer :: work_(:) type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me @@ -88,25 +88,25 @@ subroutine psb_s_apply2_vect(prec,x,y,desc_data,info,trans,work) ctxt = desc_data%get_context() call psb_info(ctxt, me, np) - if (present(trans)) then + if (present(trans)) then trans_=psb_toupper(trans) else trans_='N' end if - if (present(work)) then + if (present(work)) then work_ => work else allocate(work_(4*desc_data%get_local_cols()),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') - goto 9999 + goto 9999 end if end if - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") goto 9999 @@ -115,13 +115,13 @@ subroutine psb_s_apply2_vect(prec,x,y,desc_data,info,trans,work) call prec%prec%apply(sone,x,szero,y,desc_data,info,& & trans=trans_,work=work_) - if (present(work)) then + if (present(work)) then else deallocate(work_,stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='DeAllocate') - goto 9999 + goto 9999 end if end if @@ -136,7 +136,7 @@ end subroutine psb_s_apply2_vect subroutine psb_s_apply1_vect(prec,x,desc_data,info,trans,work) use psb_base_mod use psb_s_prec_type, psb_protect_name => psb_s_apply1_vect - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_sprec_type), intent(inout) :: prec type(psb_s_vect_type),intent(inout) :: x @@ -145,7 +145,7 @@ subroutine psb_s_apply1_vect(prec,x,desc_data,info,trans,work) real(psb_spk_),intent(inout), optional, target :: work(:) type(psb_s_vect_type) :: ww - character :: trans_ + character :: trans_ real(psb_spk_), pointer :: work_(:) type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me @@ -159,25 +159,25 @@ subroutine psb_s_apply1_vect(prec,x,desc_data,info,trans,work) ctxt = desc_data%get_context() call psb_info(ctxt, me, np) - if (present(trans)) then + if (present(trans)) then trans_=psb_toupper(trans) else trans_='N' end if - if (present(work)) then + if (present(work)) then work_ => work else allocate(work_(4*desc_data%get_local_cols()),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') - goto 9999 + goto 9999 end if end if - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") goto 9999 @@ -188,13 +188,13 @@ subroutine psb_s_apply1_vect(prec,x,desc_data,info,trans,work) & trans=trans_,work=work_) if (info == 0) call psb_geaxpby(sone,ww,szero,x,desc_data,info) call psb_gefree(ww,desc_data,info) - if (present(work)) then + if (present(work)) then else deallocate(work_,stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='DeAllocate') - goto 9999 + goto 9999 end if end if @@ -209,7 +209,7 @@ end subroutine psb_s_apply1_vect subroutine psb_s_apply2v(prec,x,y,desc_data,info,trans,work) use psb_base_mod use psb_s_prec_type, psb_protect_name => psb_s_apply2v - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_sprec_type), intent(inout) :: prec real(psb_spk_),intent(inout) :: x(:) @@ -218,7 +218,7 @@ subroutine psb_s_apply2v(prec,x,y,desc_data,info,trans,work) character(len=1), optional :: trans real(psb_spk_),intent(inout), optional, target :: work(:) - character :: trans_ + character :: trans_ real(psb_spk_), pointer :: work_(:) type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me @@ -232,37 +232,37 @@ subroutine psb_s_apply2v(prec,x,y,desc_data,info,trans,work) ctxt = desc_data%get_context() call psb_info(ctxt, me, np) - if (present(trans)) then + if (present(trans)) then trans_=trans else trans_='N' end if - if (present(work)) then + if (present(work)) then work_ => work else allocate(work_(4*desc_data%get_local_cols()),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') - goto 9999 + goto 9999 end if end if - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") goto 9999 end if call prec%prec%apply(sone,x,szero,y,desc_data,info,trans_,work=work_) - if (present(work)) then + if (present(work)) then else deallocate(work_,stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='DeAllocate') - goto 9999 + goto 9999 end if end if @@ -277,7 +277,7 @@ end subroutine psb_s_apply2v subroutine psb_s_apply1v(prec,x,desc_data,info,trans) use psb_base_mod use psb_s_prec_type, psb_protect_name => psb_s_apply1v - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_sprec_type), intent(inout) :: prec real(psb_spk_),intent(inout) :: x(:) @@ -293,36 +293,34 @@ subroutine psb_s_apply1v(prec,x,desc_data,info,trans) name='psb_s_apply1v' info = psb_success_ call psb_erractionsave(err_act) - - ctxt = desc_data%get_context() call psb_info(ctxt, me, np) - if (present(trans)) then + if (present(trans)) then trans_=psb_toupper(trans) else trans_='N' end if - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") goto 9999 end if allocate(ww(size(x)),w1(size(x)),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') - goto 9999 + goto 9999 end if call prec%prec%apply(sone,x,szero,ww,desc_data,info,& & trans_,work=w1) if(info /= psb_success_) goto 9999 x(:) = ww(:) deallocate(ww,W1,stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='DeAllocate') - goto 9999 + goto 9999 end if @@ -336,3 +334,159 @@ subroutine psb_s_apply1v(prec,x,desc_data,info,trans) end subroutine psb_s_apply1v +subroutine psb_scprecseti(prec,what,val,info,ilev,ilmax,pos,idx) + use psb_base_mod + use psb_s_prec_type, psb_protect_name => psb_scprecseti + implicit none + + class(psb_sprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + integer(psb_ipk_), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + ! This optional inputs are backport from the inputs available in AMG4PSBLAS, + ! they are of no actual use here a part from compatibility reasons. + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + + ! Local variables + character(len=*), parameter :: name='psb_precseti' + + info = psb_success_ + + ! We need to convert from the 'what' string to the corresponding integer + ! value befor passing the call to the set of the inner method. + select case (psb_toupper(trim(what))) + case ('SUB_FILLIN') + call prec%prec%precset(psb_ilu_fill_in_,val,info) + case ('INV_FILLIN') + call prec%prec%precset(psb_inv_fillin_,val,info) + case default + info = psb_err_invalid_args_combination_ + write(psb_err_unit,*) name,& + & ': Error: uninitialized preconditioner,',& + &' should call prec%init' + return + end select + +end subroutine psb_scprecseti + +subroutine psb_scprecsetr(prec,what,val,info,ilev,ilmax,pos,idx) + use psb_base_mod + use psb_s_prec_type, psb_protect_name => psb_scprecsetr + implicit none + + class(psb_sprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + real(psb_spk_), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + ! This optional inputs are backport from the inputs available in AMG4PSBLAS, + ! they are of no actual use here a part from compatibility reasons. + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + + ! Local variables + character(len=*), parameter :: name='psb_precsetr' + + info = psb_success_ + + ! We need to convert from the 'what' string to the corresponding integer + ! value befor passing the call to the set of the inner method. + select case (psb_toupper(trim(what))) + case('SUB_ILUTHRS') + call prec%prec%precset(psb_fact_eps_,val,info) + case('INV_THRESH') + call prec%prec%precset(psb_inv_thresh_,val,info) + case default + info = psb_err_invalid_args_combination_ + write(psb_err_unit,*) name,& + & ': Error: uninitialized preconditioner,',& + &' should call prec%init' + return + end select + +end subroutine psb_scprecsetr + +subroutine psb_scprecsetc(prec,what,string,info,ilev,ilmax,pos,idx) + use psb_base_mod + use psb_s_prec_type, psb_protect_name => psb_scprecsetc + implicit none + + class(psb_sprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + character(len=*), intent(in) :: string + integer(psb_ipk_), intent(out) :: info + ! This optional inputs are backport from the inputs available in AMG4PSBLAS, + ! they are of no actual use here a part from compatibility reasons. + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + + ! Local variables + character(len=*), parameter :: name='psb_precsetc' + + info = psb_success_ + + ! We need to convert from the 'what' string to the corresponding integer + ! value befor passing the call to the set of the inner method. + select case (psb_toupper(trim(what))) + case ('SUB_SOLVE') + ! We select here the type of solver on the block + select case (psb_toupper(trim(string))) + case("ILU") + call prec%prec%precset(psb_f_type_,psb_f_ilu_k_,info) + call prec%prec%precset(psb_ilu_ialg_,psb_ilu_n_,info) + case("ILUT") + call prec%prec%precset(psb_f_type_,psb_f_ilu_t_,info) + call prec%prec%precset(psb_ilu_ialg_,psb_ilu_t_,info) + case("AINV") + call prec%prec%precset(psb_f_type_,psb_f_ainv_,info) + case("INVK") + call prec%prec%precset(psb_f_type_,psb_f_invk_,info) + case("INVT") + call prec%prec%precset(psb_f_type_,psb_f_invt_,info) + case default + ! Default to ILU(0) factorization + call prec%prec%precset(psb_f_type_,psb_f_ilu_n_,info) + call prec%prec%precset(psb_ilu_ialg_,psb_ilu_n_,info) + end select + case ("ILU_ALG") + select case (psb_toupper(trim(string))) + case ("MILU") + call prec%prec%precset(psb_ilu_ialg_,psb_milu_n_,info) + case default + ! Do nothing + end select + case ("ILUT_SCALE") + select case (psb_toupper(trim(string))) + case ("MAXVAL") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_maxval_,info) + case ("DIAG") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_diag_,info) + case ("ARWSUM") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_arwsum_,info) + case ("ARCSUM") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_arcsum_,info) + case ("ACLSUM") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_aclsum_,info) + case ("NONE") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_none_,info) + case default + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_none_,info) + end select + case ("AINV_ALG") + select case (psb_toupper(trim(string))) + case("LLK") + call prec%prec%precset(psb_ainv_alg_,psb_ainv_llk_,info) + case("SYM-LLK") + call prec%prec%precset(psb_ainv_alg_,psb_ainv_s_llk_,info) + case("STAB-LLK") + call prec%prec%precset(psb_ainv_alg_,psb_ainv_s_ft_llk_,info) + case("MLK","LMX") + call prec%prec%precset(psb_ainv_alg_,psb_ainv_mlk_,info) + case default + call prec%prec%precset(psb_ainv_alg_,psb_ainv_llk_,info) + end select + case default + + end select + +end subroutine psb_scprecsetc diff --git a/prec/impl/psb_s_sp_drop.f90 b/prec/impl/psb_s_sp_drop.f90 new file mode 100644 index 00000000..bc297d08 --- /dev/null +++ b/prec/impl/psb_s_sp_drop.f90 @@ -0,0 +1,168 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG-AINV, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +subroutine psb_s_sp_drop(idiag,nzrmax,sp_thresh,nz,iz,valz,info) + use psb_base_mod + implicit none + real(psb_spk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(in) :: idiag, nzrmax + integer(psb_ipk_), intent(inout) :: nz + integer(psb_ipk_), intent(inout) :: iz(:) + real(psb_spk_), intent(inout) :: valz(:) + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: i, j, idf, nw + real(psb_spk_) :: witem + integer(psb_ipk_) :: widx + real(psb_spk_), allocatable :: xw(:) + integer(psb_ipk_), allocatable :: xwid(:), indx(:) + + + info = psb_success_ + + if (nz > min(size(iz),size(valz))) then + write(0,*) 'Serious size problem ',nz,size(iz),size(valz) + info = -2 + return + end if + allocate(xw(nz),xwid(nz),indx(nz),stat=info) + if (info /= psb_success_) then + write(psb_err_unit,*) ' Memory allocation failure in sp_drop',nz,info + return + endif + + ! Always keep the diagonal element + idf = -1 + do i=1, nz + if (iz(i) == idiag) then + idf = i + witem = valz(i) + widx = iz(i) + valz(i) = valz(1) + iz(i) = iz(1) + valz(1) = witem + iz(1) = widx + exit + end if + end do + + if (idf == -1) then + + xw(1:nz) = valz(1:nz) + call psb_qsort(xw(1:nz),indx(1:nz),dir=psb_asort_down_) + do i=1, nz + xwid(i) = iz(indx(i)) + end do + nw = min(nz,nzrmax) + do + if (nw <= 1) exit + if (abs(xw(nw)) < sp_thresh) then + nw = nw - 1 + else + exit + end if + end do + nw = max(nw, 1) + + else + + nw = nz-1 + + xw(1:nw) = valz(2:nz) + + call psb_qsort(xw(1:nw),indx(1:nw),dir=psb_asort_down_) + nw = min(nw,nzrmax-1) + do + if (nw <= 1) exit + if (abs(xw(nw)) < sp_thresh) then + nw = nw - 1 + else + exit + end if + end do + + do i=1, nw + xwid(i) = iz(1+indx(i)) + end do + nw = nw + 1 + xw(nw) = valz(1) + xwid(nw) = iz(1) + end if + + call psb_msort(xwid(1:nw),indx(1:nw),dir=psb_sort_up_) + + do i=1, nw + valz(i) = xw(indx(i)) + iz(i) = xwid(i) + end do + nz = nw + if (nz>nzrmax) write(0,*) 'in sp_drop: ',nw,nzrmax,nz + deallocate(xw,xwid,indx,stat=info) + if (info /= psb_success_) then + write(psb_err_unit,*) ' Memory deallocation failure in sp_drop',info + return + endif + return +end subroutine psb_s_sp_drop diff --git a/prec/impl/psb_s_sparsify.f90 b/prec/impl/psb_s_sparsify.f90 new file mode 100644 index 00000000..f829fbf2 --- /dev/null +++ b/prec/impl/psb_s_sparsify.f90 @@ -0,0 +1,260 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG-AINV, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +subroutine psb_s_sparsify(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,info,istart,iheap,ikr) + use psb_base_mod + implicit none + + real(psb_spk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(in) :: idiag, n, nzrmax + real(psb_spk_), intent(inout) :: zw(:) + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), intent(out) :: iz(:) + real(psb_spk_), intent(out) :: valz(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: istart + type(psb_i_heap), optional :: iheap + integer(psb_ipk_), optional :: ikr(:) + ! + integer(psb_ipk_) :: i, istart_, last_i, iret,k + real(psb_spk_) :: witem + integer(psb_ipk_) :: widx + real(psb_spk_), allocatable :: xw(:) + integer(psb_ipk_), allocatable :: xwid(:), indx(:) + type(psb_s_idx_heap) :: heap + + + info = psb_success_ + istart_ = 1 + if (present(istart)) istart_ = max(1,istart) + if (.false.) then + nz = 0 + do i=istart_, n + if ((i == idiag).or.(abs(zw(i)) >= sp_thresh)) then + nz = nz + 1 + iz(nz) = i + valz(nz) = zw(i) + end if + end do + + else + allocate(xw(nzrmax),xwid(nzrmax),indx(nzrmax),stat=info) + if (info /= psb_success_) then + return + end if + + call heap%init(info,dir=psb_asort_down_) + + ! Keep at least the diagonal + nz = 0 + + if (present(iheap)) then + if (.not.(present(ikr))) then + write(psb_err_unit,*) 'Error: if IHEAP then also IKR' + info = -1 + return + end if + last_i = -1 + do + call iheap%get_first(i,iret) + if (iret < 0) exit + ! An index may have been put on the heap more than once. + if (i == last_i) cycle + last_i = i + if (i == idiag) then + xw(1) = zw(i) + xwid(1) = i + else if (abs(zw(i)) >= sp_thresh) then + call heap%insert(zw(i),i,info) + end if + zw(i) = dzero + ikr(i) = 0 + end do + + else + + do i=istart_, n + if (i == idiag) then + xw(1) = zw(i) + xwid(1) = i + else if (abs(zw(i)) >= sp_thresh) then + call heap%insert(zw(i),i,info) + end if + zw(i) = dzero + end do + end if + + k = 1 + do + if (k == nzrmax) exit + call heap%get_first(witem,widx,info) + if (info == -1) then + info = psb_success_ + exit + endif + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + return + end if + k = k + 1 + xw(k) = witem + xwid(k) = widx + end do + call heap%free(info) + nz = k + call psb_msort(xwid(1:nz),indx(1:nz),dir=psb_sort_up_) +!!$ write(0,*) 'sparsify output for idiag ',idiag,' :',nz,sp_thresh + do i=1, nz + valz(i) = xw(indx(i)) + iz(i) = xwid(i) +!!$ write(0,*) ' ',iz(i),valz(i) + end do + + end if + + return + +end subroutine psb_s_sparsify + + +subroutine psb_s_sparsify_list(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,lhead,listv,ikr,info) + use psb_base_mod + implicit none + + real(psb_spk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(in) :: idiag, n, nzrmax + real(psb_spk_), intent(inout) :: zw(:) + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), intent(out) :: iz(:) + real(psb_spk_), intent(out) :: valz(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(inout) :: lhead, listv(:) + integer(psb_ipk_) :: ikr(:) + ! + integer(psb_ipk_) :: i, istart_, last_i, iret,k,current, next + real(psb_spk_) :: witem + integer(psb_ipk_) :: widx + real(psb_spk_), allocatable :: xw(:) + integer(psb_ipk_), allocatable :: xwid(:), indx(:) + + + info = psb_success_ + istart_ = 1 + allocate(xw(n),xwid(n),indx(n),stat=info) + + current = lhead + lhead = -1 + i = 0 + do while (current >0) + i = i + 1 + xw(i) = zw(current) + xwid(i) = current + + if (current == idiag) then + ! Bring the diagona into first position + witem = xw(1) + widx = xwid(1) + xw(1) = xw(i) + xwid(1) = xwid(i) + xw(i) = witem + xwid(i) = widx + end if + + zw(current) = dzero + ikr(current) = 0 + + next = listv(current) + listv(current) = -1 + current = next + end do + nz = i + if (nz > 2) call psb_hsort(xw(2:nz),ix=xwid(2:nz),& + & dir=psb_asort_down_,flag=psb_sort_keep_idx_) +!!$ write(0,*) 'Done first msort ' +!!$ write(0,*) ' after first msort for idiag ',idiag,' :',nz,sp_thresh +!!$ do i=1, nz +!!$ write(0,*) ' ',xwid(i),xw(i) +!!$ end do + + i = 2 + do while (i<=nz) + if (abs(xw(i)) < sp_thresh) exit + i = i + 1 + end do +!!$ write(0,*) 'NZ ',nz, i, nzrmax + nz = max(1,min(i-1,nzrmax)) + call psb_msort(xwid(1:nz),ix=indx(1:nz),dir=psb_sort_up_) +!!$ write(0,*) 'Done second msort ' + +!!$ write(0,*) 'sparsify output for idiag ',idiag,' :',nz,i,sp_thresh + do i=1, nz + valz(i) = xw(indx(i)) + iz(i) = xwid(i) +!!$ write(0,*) ' ',iz(i),valz(i) + end do + + return + +end subroutine psb_s_sparsify_list diff --git a/prec/impl/psb_srwclip.f90 b/prec/impl/psb_srwclip.f90 new file mode 100644 index 00000000..f57207d7 --- /dev/null +++ b/prec/impl/psb_srwclip.f90 @@ -0,0 +1,90 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG-AINV, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +subroutine psb_s_rwclip(nz,ia,ja,val,imin,imax,jmin,jmax) + use psb_base_mod + + implicit none + integer(psb_ipk_), intent(inout) :: nz + integer(psb_ipk_), intent(inout) :: ia(*), ja(*) + real(psb_spk_), intent(inout) :: val(*) + integer(psb_ipk_), intent(in) :: imin,imax,jmin,jmax + + integer(psb_ipk_) :: i,j + + j = 0 + do i=1, nz + if ((imin <= ia(i)).and.& + & (ia(i) <= imax).and.& + & (jmin <= ja(i)).and.& + & (ja(i) <= jmax) ) then + j = j + 1 + ia(j) = ia(i) + ja(j) = ja(i) + val(j) = val(i) + end if + end do + nz = j +end subroutine psb_s_rwclip diff --git a/prec/impl/psb_ssparse_biconjg_llk.F90 b/prec/impl/psb_ssparse_biconjg_llk.F90 new file mode 100644 index 00000000..6269cdc8 --- /dev/null +++ b/prec/impl/psb_ssparse_biconjg_llk.F90 @@ -0,0 +1,366 @@ +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +subroutine psb_ssparse_biconjg_llk(n,a,p,z,w,nzrmax,sp_thresh,info) + use psb_base_mod + use psb_s_ainv_tools_mod + use psb_s_biconjg_mod, psb_protect_name => psb_ssparse_biconjg_llk + ! + ! Left-looking variant + ! + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_s_csr_sparse_mat), intent(in) :: a + type(psb_s_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_spk_), intent(in) :: sp_thresh + real(psb_spk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + ! Locals + integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:) + real(psb_spk_), allocatable :: zval(:),val(:), q(:) + integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,& + & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,& + & ipj, lastj, nextj, nzw + type(psb_i_heap) :: heap, rheap + type(psb_s_csc_sparse_mat) :: ac + real(psb_spk_) :: alpha + character(len=20) :: name='psb_orth_llk' + logical, parameter :: debug=.false. + + allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),stat=info) + if (info == psb_success_) call ac%cp_from_fmt(a,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + return + end if + ! + ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap. + ! izcr(i): flag rows to be used for the dot products. Used to minimize + ! traffic in rheap. + ! + do i=1,n + izkr(i) = 0 + izcr(i) = 0 + zval(i) = szero + end do + + ! Init z_1=e_1 and p_1=a_11 + p(1) = szero + i = 1 + nz = a%irp(i+1) - a%irp(i) + do j=1,nz + if (a%ja(j) == 1) then + p(1) = a%val(j) + exit + end if + end do + if (abs(p(1)) < s_epstol) & + & p(1) = 1.d-3 + + q(1) = p(1) + ! + ! + call z%allocate(n,n,n*nzrmax) + + z%icp(1) = 1 + z%icp(2) = 2 + z%ia(1) = 1 + z%val(1) = sone + nzz = 1 + + call w%allocate(n,n,n*nzrmax) + w%icp(1) = 1 + w%icp(2) = 2 + w%ia(1) = 1 + w%val(1) = sone + nzw = 1 + + do i = 2, n + if (debug) write(0,*) 'Main loop iteration ',i,n + + ! + ! Update loop on Z. + ! Must be separated from update loop of W because of + ! the conflict on J that would result. + ! + + ! ZVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = szero + ! !$ end do + zval(i) = sone + izkr(i) = 1 + call heap%init(info) + if (info == psb_success_) call heap%insert(i,info) + + if (info == psb_success_) call rheap%init(info) + + do j = ac%icp(i), ac%icp(i+1)-1 + if (ac%ia(j) < i) then +!!$ write(0,*) i, ' Outer inserting ',ac%ia(j) + if (info == psb_success_) call rheap%insert(ac%ia(j),info) + izcr(ac%ia(j)) = 1 + end if + end do + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_init_heap') + return + end if + + ! Update loop + ! The idea is to keep track of the indices of the nonzeros in zval, + ! so as to only do the dot products on the rows which have nonzeros + ! in their positions; to do this we keep an extra + ! copy of A in CSC, and the row indices to be considered are in rheap. + lastj = -1 + outer: do + inner: do + call rheap%get_first(j,info) + if (debug) write(0,*) 'from get_first: ',j,info + if (info == -1) exit outer ! Empty heap + if (j > lastj) then + lastj = j + exit inner + end if + end do inner + + izcr(j) = 0 + if (j>=i) cycle outer + if (debug) write(0,*) 'update loop, using row: ',j,i + ip1 = a%irp(j) + ip2 = a%irp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (a%ja(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval) + ! ! write(psb_err_unit,*) j,i,p(i) + + alpha = (-p(i)/p(j)) +!!$ write(0,*) 'At step ',i,j,' p(i) ',p(i),alpha + if (.false..or.(abs(alpha) > sp_thresh)) then +!!$ write(0,*) 'At step ',i,j,' range ',z%icp(j), z%icp(j+1)-1, & +!!$ & ' vals ',z%ia(z%icp(j):z%icp(j+1)-1) + do k=z%icp(j), z%icp(j+1)-1 + kr = z%ia(k) + zval(kr) = zval(kr) + alpha*z%val(k) + if (izkr(kr) == 0) then +!!$ write(0,*) ' main inner Inserting ',kr + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj lastj) then + lastj = j + exit innerw + end if + end do innerw + izcr(j) = 0 + if (j>=i) cycle outerw + if (debug) write(0,*) 'update loop, using row: ',j + ip1 = ac%icp(j) + ip2 = ac%icp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (ac%ia(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-q(i)/q(j)) + if (.false..or.(abs(alpha) > sp_thresh)) then + + do k=w%icp(j), w%icp(j+1)-1 + kr = w%ia(k) + zval(kr) = zval(kr) + alpha*w%val(k) + if (izkr(kr) == 0) then + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj psb_ssparse_biconjg_llk_noth + + ! + ! Left-looking variant, with NO drop rule on p(i)/p(j) + ! + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_s_csr_sparse_mat), intent(in) :: a + type(psb_s_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_spk_), intent(in) :: sp_thresh + real(psb_spk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + ! Locals + integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:) + real(psb_spk_), allocatable :: zval(:),val(:), q(:) + integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,& + & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,& + & ipj, lastj, nextj, nzw + type(psb_i_heap) :: heap, rheap + type(psb_s_csc_sparse_mat) :: ac + real(psb_dpk_) :: alpha + character(len=20) :: name='psb_orth_llk_noth' + logical, parameter :: debug=.false. + + allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),stat=info) + if (info == psb_success_) call ac%cp_from_fmt(a,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + return + end if + ! + ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap. + ! izcr(i): flag rows to be used for the dot products. Used to minimize + ! traffic in rheap. + ! + do i=1,n + izkr(i) = 0 + izcr(i) = 0 + zval(i) = szero + end do + + ! Init z_1=e_1 and p_1=a_11 + p(1) = szero + i = 1 + nz = a%irp(i+1) - a%irp(i) + do j=1,nz + if (a%ja(j) == 1) then + p(1) = a%val(j) + exit + end if + end do + if (abs(p(1)) < s_epstol) & + & p(1) = 1.d-3 + + q(1) = p(1) + ! + ! + call z%allocate(n,n,n*nzrmax) + + z%icp(1) = 1 + z%icp(2) = 2 + z%ia(1) = 1 + z%val(1) = done + nzz = 1 + + call w%allocate(n,n,n*nzrmax) + w%icp(1) = 1 + w%icp(2) = 2 + w%ia(1) = 1 + w%val(1) = done + nzw = 1 + + do i = 2, n + if (debug) write(0,*) 'Main loop iteration ',i,n + + ! + ! Update loop on Z. + ! Must be separated from update loop of W because of + ! the conflict on J that would result. + ! + + ! ZVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = szero + ! !$ end do + zval(i) = done + izkr(i) = 1 + call heap%init(info) + if (info == psb_success_) call heap%insert(i,info) + + if (info == psb_success_) call rheap%init(info) + do j = ac%icp(i), ac%icp(i+1)-1 + if (ac%ia(j) < i) then + if (info == psb_success_) call rheap%insert(ac%ia(j),info) + izcr(ac%ia(j)) = 1 + end if + end do + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_init_heap') + return + end if + + ! Update loop + ! The idea is to keep track of the indices of the nonzeros in zval, + ! so as to only do the dot products on the rows which have nonzeros + ! in their positions; to do this we keep an extra + ! copy of A in CSC, and the row indices to be considered are in rheap. + lastj = -1 + outer: do + inner: do + call rheap%get_first(j,info) + if (debug) write(0,*) 'from get_first: ',j,info + if (info == -1) exit outer ! Empty heap + if (j > lastj) then + lastj = j + exit inner + end if + end do inner + + izcr(j) = 0 + if (j>=i) cycle outer + if (debug) write(0,*) 'update loop, using row: ',j,i + ip1 = a%irp(j) + ip2 = a%irp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (a%ja(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-p(i)/p(j)) + + if (.true.) then + do k=z%icp(j), z%icp(j+1)-1 + kr = z%ia(k) + zval(kr) = zval(kr) + alpha*z%val(k) + if (izkr(kr) == 0) then + + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj lastj) then + lastj = j + exit innerw + end if + end do innerw + izcr(j) = 0 + if (j>=i) cycle outerw + if (debug) write(0,*) 'update loop, using row: ',j + ip1 = ac%icp(j) + ip2 = ac%icp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (ac%ia(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-q(i)/q(j)) + if (.true.) then + + do k=w%icp(j), w%icp(j+1)-1 + kr = w%ia(k) + zval(kr) = zval(kr) + alpha*w%val(k) + if (izkr(kr) == 0) then + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj psb_ssparse_biconjg_mlk + ! + ! Left-looking variant + ! + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_s_csr_sparse_mat), intent(in) :: a + type(psb_s_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_spk_), intent(in) :: sp_thresh + real(psb_spk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + ! Locals + integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:), hlist(:), bfr(:), rwlist(:) + real(psb_spk_), allocatable :: zval(:),val(:), q(:) + integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,& + & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,& + & ipj, lastj, nextj, nzw, hlhead, li, mj, kkc, ifrst, ilst, rwhead + type(psb_i_heap) :: heap, rheap + type(psb_s_csc_sparse_mat) :: ac + real(psb_spk_) :: alpha + character(len=20) :: name='psb_biconjg_mlk' + logical, parameter :: debug=.false., test_merge=.true. + + allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n), & + & hlist(n),rwlist(n),bfr(n),stat=info) + if (info == psb_success_) call ac%cp_from_fmt(a,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + return + end if + ! + ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap. + ! izcr(i): flag rows to be used for the dot products. Used to minimize + ! traffic in rheap. + ! + do i=1,n + izkr(i) = 0 + izcr(i) = 0 + zval(i) = szero + hlist(i) = -1 + rwlist(i) = -1 + end do + + ! Init z_1=e_1 and p_1=a_11 + p(1) = szero + i = 1 + nz = a%irp(i+1) - a%irp(i) + do j=1,nz + if (a%ja(j) == 1) then + p(1) = a%val(j) + exit + end if + end do + if (abs(p(1)) < s_epstol) & + & p(1) = 1.d-3 + + q(1) = p(1) + ! + ! + call z%allocate(n,n,n*nzrmax) + + z%icp(1) = 1 + z%icp(2) = 2 + z%ia(1) = 1 + z%val(1) = sone + nzz = 1 + + call w%allocate(n,n,n*nzrmax) + w%icp(1) = 1 + w%icp(2) = 2 + w%ia(1) = 1 + w%val(1) = sone + nzw = 1 + + + do i = 2, n + if (debug) write(0,*) 'Main loop iteration ',i,n + + ! + ! Update loop on Z. + ! Must be separated from update loop of W because of + ! the conflict on J that would result. + ! + + ! ZVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = szero + ! !$ end do + zval(i) = sone + izkr(i) = 1 + rwhead = i + + hlhead = -1 + + kkc = 0 + ilst = ac%icp(i)-1 + ifrst = ac%icp(i) + do j = ac%icp(i+1)-1, ac%icp(i), -1 + if (ac%ia(j) < i) then + ilst = j + exit + end if + end do + kkc = ilst-ifrst+1 + + if (.true..or.debug) then +!!$ write(0,*) 'Outer Before insertion : ',hlhead + call printlist(hlhead,hlist) + end if + if (kkc > 0) then +!!$ write(0,*) i,' Outer Inserting : ',kkc,':',ac%ia(ifrst:ilst) + + !call hlmerge(hlhead,hlist,bfr(1:kkc)) + call hlmerge(hlhead,hlist,ac%ia(ifrst:ilst)) + end if + if (debug) then + write(0,*) 'Outer After insertion: ',hlhead + call printlist(hlhead,hlist) + end if + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='init_lists') + return + end if + + ! Update loop + ! The idea is to keep track of the indices of the nonzeros in zval, + ! so as to only do the dot products on the rows which have nonzeros + ! in their positions; to do this we keep an extra + ! copy of A in CSC, and the row indices to be considered are in rheap. + lastj = -1 + outer: do + mj = hlhead + if (mj > 0) then + hlhead = hlist(mj) + hlist(mj) = -1 + end if + j = mj + if (j < 0) exit outer + + izcr(j) = 0 + if (j>=i) cycle outer + + if (debug) write(0,*) 'update loop, using row: ',j,i,mj + ip1 = a%irp(j) + ip2 = a%irp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (a%ja(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-p(i)/p(j)) +!!$ write(0,*) 'At step ',i,j,' p(i) ',p(i),alpha +!!$ write(0,*) ' Current list is : ',hlhead +!!$ call printlist(hlhead,hlist) +!!$ + + + if (.false..or.(abs(alpha) > sp_thresh)) then + ifrst=z%icp(j) + ilst=z%icp(j+1)-1 + call hlmerge(rwhead,rwlist,z%ia(ifrst:ilst)) +!!$ write(0,*) 'At step ',i,j,' range ',z%icp(j), z%icp(j+1)-1, & +!!$ & ' vals ',z%ia(z%icp(j):z%icp(j+1)-1) + + do k=z%icp(j), z%icp(j+1)-1 + kr = z%ia(k) + zval(kr) = zval(kr) + alpha*z%val(k) + + if (izkr(kr) == 0) then +!!$ write(0,*) ' main inner Inserting ',kr +!!$ call hlmerge(rwhead,rwlist,(/kr/)) + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj)) then + ifrst = min(ifrst,kc ) + ilst = max(ilst,kc) + end if + end do + kkc = ilst-ifrst+1 + if (debug) then + write(0,*) 'Inner Before insertion: ' + call printlist(hlhead,hlist) + write(0,*) 'Inner Inserting : ',kkc,':',ac%ia(ifrst:ilst) + end if + if (ilst >= ifrst) then +!!$ write(0,*) j,i,' Inner inserting ',ac%ia(ifrst:ilst) + call hlmerge(hlhead,hlist,ac%ia(ifrst:ilst)) + end if + + if (debug) then + write(0,*) 'Inner After insertion: ',hlhead + call printlist(hlhead,hlist) + end if + + end if + if (info /= psb_success_) exit + end do + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + return + end if + end if + end do outer + call a%csget(i,i,nzra,ia,ja,val,info) + call rwclip(nzra,ia,ja,val,ione,n,ione,n) + p(i) = psb_spge_dot(nzra,ja,val,zval) + if (abs(p(i)) < s_epstol) & + & p(i) = 1.d-3 + +!!$ write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh + + ! + ! Sparsify current ZVAL and put into ZMAT + ! + call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,rwhead,rwlist,izkr,info) + + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='sparsify') + return + end if + call psb_ensure_size(nzz+nzrz, z%ia, info) + call psb_ensure_size(nzz+nzrz, z%val, info) + ipz1 = z%icp(i) + do j=1, nzrz + z%ia(ipz1 + j -1) = ia(j) + z%val(ipz1 + j -1) = val(j) + end do + z%icp(i+1) = ipz1 + nzrz + nzz = nzz + nzrz + + + ! WVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = szero + ! !$ end do + zval(i) = sone + izkr(i) = 1 + rwhead = i + hlhead = -1 + + kkc = 0 + ilst = a%irp(i)-1 + ifrst = a%irp(i) + do j = a%irp(i+1)-1, a%irp(i), -1 + if (a%ja(j) < i) then + ilst = j + exit + end if + end do + kkc = ilst-ifrst+1 + + if (debug) then + write(0,*) 'Outer Before insertion: ' + call printlist(hlhead,hlist) + write(0,*) 'Outer Inserting : ',kkc,':',a%ja(ifrst:ilst) + end if + if (kkc > 0 ) then + !call hlmerge(hlhead,hlist,bfr(1:kkc)) + call hlmerge(hlhead,hlist,a%ja(ifrst:ilst)) + end if + if (debug) then + write(0,*) 'Outer After insertion: ',hlhead + call printlist(hlhead,hlist) + end if + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='init_lists') + return + end if + + ! Update loop + ! The idea is to keep track of the indices of the nonzeros in zval, + ! so as to only do the dot products on the rows which have nonzeros + ! in their positions; to do this we keep an extra + ! copy of A in CSC, and the row indices to be considered are in rheap. + lastj = -1 + outerw: do + mj = hlhead + if (hlhead > 0) then + hlhead = hlist(mj) + hlist(mj) = -1 + end if + j = mj + if (j < 0) exit outerw + + izcr(j) = 0 + if (j>=i) cycle outerw + if (debug) write(0,*) 'update loop, using row: ',j + ip1 = ac%icp(j) + ip2 = ac%icp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (ac%ia(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-q(i)/q(j)) + if (.false..or.(abs(alpha) > sp_thresh)) then + ifrst=w%icp(j) + ilst=w%icp(j+1)-1 + call hlmerge(rwhead,rwlist,w%ia(ifrst:ilst)) + + do k=w%icp(j), w%icp(j+1)-1 + kr = w%ia(k) + zval(kr) = zval(kr) + alpha*w%val(k) + if (izkr(kr) == 0) then + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj)) then + ifrst = min(ifrst,kc ) + ilst = max(ilst,kc) + end if + end do + kkc = ilst-ifrst+1 + if (debug) then + write(0,*) 'Inner Before insertion: ' + call printlist(hlhead,hlist) + write(0,*) 'Inner Inserting : ',kkc,':',a%ja(ifrst:ilst) + end if + + call hlmerge(hlhead,hlist,a%ja(ifrst:ilst)) + + if (debug) then + write(0,*) 'Inner After insertion: ',hlhead + call printlist(hlhead,hlist) + end if + if (debug) write(0,*) 'update loop, adding indices: ',& + & a%ja(a%irp(kr):a%irp(kr+1)-1) + + end if + if (info /= psb_success_) exit + end do + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + return + end if + end if + end do outerw + ip1 = ac%icp(i) + ip2 = ac%icp(i+1) - 1 + do + if (ip2 < ip1 ) exit + if (ac%ia(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval) + if (abs(q(i)) < s_epstol) & + & q(i) = 1.d-3 + +!!$ write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh + ! + ! Sparsify current ZVAL and put into ZMAT + ! + call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,rwhead,rwlist,izkr,info) + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='sparsify') + return + end if + call psb_ensure_size(nzw+nzrz, w%ia, info) + call psb_ensure_size(nzw+nzrz, w%val, info) + ipz1 = w%icp(i) + do j=1, nzrz + w%ia(ipz1 + j -1) = ia(j) + w%val(ipz1 + j -1) = val(j) + end do + w%icp(i+1) = ipz1 + nzrz + nzw = nzw + nzrz + + end do + +contains + + subroutine hlmerge(head,listv,vals) + integer(psb_ipk_), intent(inout) :: head, listv(:) + integer(psb_ipk_), intent(in) :: vals(:) + integer(psb_ipk_) :: i,j,k, lh, lv, nv, vv, flh, ph + + nv = size(vals) + lh = head + flh = -1 + lv = 1 + if ((head < 0).and.(nv > 0)) then + ! Adjust head if empty + head = vals(lv) + lv = lv + 1 + else if ((head > 0) .and. (nv >0)) then + ! Adjust head if first item less than it + if (head > vals(lv)) then + listv(vals(lv)) = head + head = vals(lv) + lv = lv + 1 + end if + end if + + lh = head + ph = lh + do while ((lh > 0) .and. (lv <= nv)) + if (lh == vals(lv)) then + lv = lv + 1 + else if (lh > vals(lv)) then + listv(vals(lv)) = lh + listv(ph) = vals(lv) + lh = vals(lv) + lv = lv + 1 + else + ph = lh + lh = listv(lh) + end if + end do + lh = ph + do while (lv <= nv) + listv(lh) = vals(lv) + lh = listv(lh) + lv = lv + 1 + end do + end subroutine hlmerge + + + subroutine printlist(head,listv) + integer(psb_ipk_), intent(in) :: head, listv(:) + integer(psb_ipk_) :: li + + li = head + do while (li > 0) + write(0,*) 'Item: ', li + li = listv(li) + end do + end subroutine printlist + +end subroutine psb_ssparse_biconjg_mlk diff --git a/prec/impl/psb_ssparse_biconjg_s_ft_llk.F90 b/prec/impl/psb_ssparse_biconjg_s_ft_llk.F90 new file mode 100644 index 00000000..e8287e84 --- /dev/null +++ b/prec/impl/psb_ssparse_biconjg_s_ft_llk.F90 @@ -0,0 +1,414 @@ +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +subroutine psb_ssparse_biconjg_s_ft_llk(n,a,p,z,w,nzrmax,sp_thresh,info) + use psb_base_mod + use psb_s_ainv_tools_mod + use psb_s_biconjg_mod, psb_protect_name => psb_ssparse_biconjg_s_ft_llk + + ! + ! Left-looking variant, stabilized i.e. product by A is applied + ! to compute the diagonal elements. + ! + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_s_csr_sparse_mat), intent(in) :: a + type(psb_s_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_spk_), intent(in) :: sp_thresh + real(psb_spk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + ! Locals + integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:),iww(:) + real(psb_spk_), allocatable :: zval(:),val(:), q(:), ww(:) + integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj, nzww,& + & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,& + & ipj, lastj, nextj, nzw, nzrw + type(psb_i_heap) :: heap, rheap + type(psb_s_csc_sparse_mat) :: ac + real(psb_spk_) :: alpha, tmpq,tmpq2 + character(len=20) :: name='psb_orth_llk' + logical, parameter :: debug=.false. + + allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),iww(n),ww(n),stat=info) + if (info == psb_success_) call ac%cp_from_fmt(a,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + return + end if + ! + ! Init pointers to: + ! ljr(i): last occupied column index within row I + ! izcr(i): first occupied row index within column I + ! + do i=1,n + izkr(i) = 0 + izcr(i) = 0 + zval(i) = szero + end do + + ! Init z_1=e_1 and p_1=a_11 + p(1) = szero + i = 1 + nz = a%irp(i+1) - a%irp(i) + do j=1,nz + if (a%ja(j) == 1) then + p(1) = a%val(j) + exit + end if + end do + if (abs(p(1)) < s_epstol) & + & p(1) = 1.d-3 + + q(1) = p(1) + ! + ! + call z%allocate(n,n,n*nzrmax) + + z%icp(1) = 1 + z%icp(2) = 2 + z%ia(1) = 1 + z%val(1) = sone + nzz = 1 + + call w%allocate(n,n,n*nzrmax) + w%icp(1) = 1 + w%icp(2) = 2 + w%ia(1) = 1 + w%val(1) = sone + nzw = 1 + + do i = 2, n + if (debug) write(0,*) 'Main loop iteration ',i,n + + ! + ! Update loop on Z. + ! Must be separated from update loop of W because of + ! the conflict on J that would result. + ! + + ! ZVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = szero + ! !$ end do + zval(i) = sone + izkr(i) = 1 + call heap%init(info) + if (info == psb_success_) call heap%insert(i,info) + if (info == psb_success_) call rheap%init(info) + do j = ac%icp(i), ac%icp(i+1)-1 + if (ac%ia(j) lastj) then + lastj = j + exit inner + end if + end do inner + izcr(j) = 0 + if (j>=i) exit outer + if (debug) write(0,*) 'update loop, using row: ',j + ip1 = w%icp(j) + ip2 = w%icp(j+1) - 1 + nzra = max(0,ip2 - ip1 + 1) + nzww = 0 + call psb_s_spvspm(sone,a,nzra,w%ia(ip1:ip2),w%val(ip1:ip2),& + & szero,nzww,iww,ww,info) + + p(i) = psb_spge_dot(nzww,iww,ww,zval) + + ipz1 = z%icp(j) + ipz2 = z%icp(j+1) + nzrz = ipz2-ipz1 + alpha = (-p(i)/p(j)) +!!$ write(0,*) ' p(i)/p(j) ',i,j,alpha,p(i),p(j) + if (.false..or.(abs(alpha) > sp_thresh)) then + + do k=ipz1, ipz2-1 + kr = z%ia(k) + zval(kr) = zval(kr) + alpha*z%val(k) + if (izkr(kr) == 0) then + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj lastj) then + lastj = j + exit innerw + end if + end do innerw + izcr(j) = 0 + if (j>=i) exit outerw + if (debug) write(0,*) 'update loop, using row: ',j + if (.false.) then + ip1 = ac%icp(j) + ip2 = ac%icp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (ac%ia(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + else + ip1 = z%icp(j) + ip2 = z%icp(j+1) - 1 + nzra = max(0,ip2 - ip1 + 1) + nzww = 0 + call psb_s_spmspv(sone,ac,nzra,z%ia(ip1:ip2),z%val(ip1:ip2),& + & szero,nzww,iww,ww,info) + + q(i) = psb_spge_dot(nzww,iww,ww,zval) + end if + + ipz1 = w%icp(j) + ipz2 = w%icp(j+1) + nzrz = ipz2-ipz1 + alpha = (-q(i)/q(j)) +!!$ write(0,*) ' q(i)/q(j) ',i,j,alpha,q(i),q(j) + if (.false..or.(abs(alpha) > sp_thresh)) then + + do k=ipz1, ipz2-1 + kr = w%ia(k) + zval(kr) = zval(kr) + alpha*w%val(k) + if (izkr(kr) == 0) then + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj psb_ssparse_biconjg_s_llk + + ! + ! Left-looking variant SYMMETRIC/HERMITIAN A. You have been warned! + ! + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_s_csr_sparse_mat), intent(in) :: a + type(psb_s_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_spk_), intent(in) :: sp_thresh + real(psb_spk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + ! Locals + integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:) + real(psb_spk_), allocatable :: zval(:),val(:), q(:) + integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,& + & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,& + & ipj, lastj, nextj, nzw,kk + type(psb_i_heap) :: heap, rheap + type(psb_s_csc_sparse_mat) :: ac + real(psb_spk_) :: alpha, zvalmax + character(len=20) :: name='psb_orth_llk' + logical, parameter :: debug=.false. + + allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),stat=info) + if (info == psb_success_) call ac%cp_from_fmt(a,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + return + end if + ! + ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap. + ! izcr(i): flag rows to be used for the dot products. Used to minimize + ! traffic in rheap. + ! + do i=1,n + izkr(i) = 0 + izcr(i) = 0 + zval(i) = szero + end do + + ! Init z_1=e_1 and p_1=a_11 + p(1) = szero + i = 1 + nz = a%irp(i+1) - a%irp(i) + do j=1,nz + if (a%ja(j) == 1) then + p(1) = a%val(j) + exit + end if + end do + if (abs(p(1)) < s_epstol) & + & p(1) = 1.d-3 + + ! + ! + call z%allocate(n,n,n*nzrmax) + + z%icp(1) = 1 + z%icp(2) = 2 + z%ia(1) = 1 + z%val(1) = sone + nzz = 1 + zvalmax = sone + + do i = 2, n + if (debug) write(0,*) 'Main loop iteration ',i,n + + ! + ! Update loop on Z. + ! Must be separated from update loop of W because of + ! the conflict on J that would result. + ! + + ! ZVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = szero + ! !$ end do + zval(i) = sone + izkr(i) = 1 + call heap%init(info) + if (info == psb_success_) call heap%insert(i,info) + + if (info == psb_success_) call rheap%init(info) + do j = ac%icp(i), ac%icp(i+1)-1 + if (ac%ia(j) < i) then + if (info == psb_success_) call rheap%insert(ac%ia(j),info) + izcr(ac%ia(j)) = 1 + end if + end do + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_init_heap') + return + end if + + ! Update loop + ! The idea is to keep track of the indices of the nonzeros in zval, + ! so as to only do the dot products on the rows which have nonzeros + ! in their positions; to do this we keep an extra + ! copy of A in CSC, and the row indices to be considered are in rheap. + lastj = -1 + outer: do + inner: do + call rheap%get_first(j,info) + if (debug) write(0,*) 'from get_first: ',j,info + if (info == -1) exit outer ! Empty heap + if (j > lastj) then + lastj = j + exit inner + end if + end do inner + + izcr(j) = 0 + if (j>=i) cycle outer + if (debug) write(0,*) 'update loop, using row: ',j,i + ip1 = a%irp(j) + ip2 = a%irp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (a%ja(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-p(i)/p(j)) + + if (.false..or.(abs(alpha) > sp_thresh)) then + do k=z%icp(j), z%icp(j+1)-1 + kr = z%ia(k) + zval(kr) = zval(kr) + alpha*z%val(k) +!!$ if (abs(zval(kr)) > 1e16) then +!!$ write(0,*) i,j,p(i),p(j),alpha,z%val(k),alpha*z%val(k),kr,zval(kr) +!!$ end if + if (izkr(kr) == 0) then + + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' start' + + iscale_ = psb_ilu_scale_none_ + if (present(iscale)) iscale_ = iscale + weight = zone + ! + ! Check the memory available to hold the W and Z factors + ! and allocate it if needed + ! + nrow_a = a%get_nrows() + nztota = a%get_nzeros() + + if (present(blck)) then + nztota = nztota + blck%get_nzeros() + end if + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & ': out get_nnzeros',nrow_a,nztota,& + & a%get_nrows(),a%get_ncols(),a%get_nzeros() + + + n_row = desc%get_local_rows() + allocate(pq(n_row),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + + nzrmax = fillin + sp_thresh = thresh + + ! + ! Ok, let's start first with Z (i.e. Upper) + ! + call a%csclip(acoo,info,imax=n_row,jmax=n_row) + call acsr%mv_from_coo(acoo,info) + select case(iscale_) + case(psb_ilu_scale_none_) + ! Ok, do nothing. + + case(psb_ilu_scale_maxval_) + weight = acsr%maxval() + weight = zone/weight + call acsr%scal(weight,info) + + case(psb_ilu_scale_arcsum_) + allocate(arws(n_row),acls(n_row),ad(n_row),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + call acsr%arwsum(arws) + call acsr%aclsum(acls) + ad(1:n_row) = sqrt(sqrt(arws(1:n_row)*acls(1:n_row))) + ad(1:n_row) = zone/ad(1:n_row) + call acsr%scal(ad,info,side='L') + call acsr%scal(ad,info,side='R') + case default + call psb_errpush(psb_err_from_subroutine_,name,a_err='wrong iscale') + goto 9999 + end select + + ! + ! Here for the actual workhorses. + ! Only biconjg is surviving for now.... + ! + call psb_sparse_biconjg(alg,n_row,acsr,pq,& + & zmat,wmat,nzrmax,sp_thresh,info) + ! Done. Hopefully.... + + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='sparse_biconjg') + goto 9999 + end if + call atmp%mv_from(acsr) + + ! + ! Is this right??? + ! + do i=1, n_row + if (abs(pq(i)) < d_epstol) then + pq(i) = zone + else + pq(i) = zone/pq(i) + end if + end do + + select case(iscale_) + case(psb_ilu_scale_none_) + ! Ok, do nothing. + case(psb_ilu_scale_maxval_) + pq(:) = pq(:)*weight + + case(psb_ilu_scale_arcsum_) + call zmat%scal(ad,info,side='L') + call wmat%scal(ad,info,side='R') + + case default + call psb_errpush(psb_err_from_subroutine_,name,a_err='wrong iscale') + goto 9999 + end select + + call psb_move_alloc(pq,d,info) + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' end' + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_z_ainv_bld diff --git a/prec/impl/psb_z_bjacprec_impl.f90 b/prec/impl/psb_z_bjacprec_impl.f90 index 70e062a5..3533f1e3 100644 --- a/prec/impl/psb_z_bjacprec_impl.f90 +++ b/prec/impl/psb_z_bjacprec_impl.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,13 +27,13 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! subroutine psb_z_bjac_dump(prec,info,prefix,head) use psb_base_mod use psb_z_bjacprec, psb_protect_name => psb_z_bjac_dump - implicit none + implicit none class(psb_z_bjac_prec_type), intent(in) :: prec integer(psb_ipk_), intent(out) :: info character(len=*), intent(in), optional :: prefix,head @@ -43,13 +43,13 @@ subroutine psb_z_bjac_dump(prec,info,prefix,head) character(len=80) :: prefix_ character(len=120) :: fname ! len should be at least 20 more than - ! len of prefix_ + ! len of prefix_ info = 0 ctxt = prec%get_ctxt() call psb_info(ctxt,iam,np) - if (present(prefix)) then + if (present(prefix)) then prefix_ = trim(prefix(1:min(len(prefix),len(prefix_)))) else prefix_ = "dump_fact_z" @@ -74,7 +74,7 @@ end subroutine psb_z_bjac_dump subroutine psb_z_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) use psb_base_mod use psb_z_bjacprec, psb_protect_name => psb_z_bjac_apply_vect - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_z_bjac_prec_type), intent(inout) :: prec complex(psb_dpk_),intent(in) :: alpha,beta @@ -118,12 +118,12 @@ subroutine psb_z_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) n_row = desc_data%get_local_rows() n_col = desc_data%get_local_cols() - if (x%get_nrows() < n_row) then + if (x%get_nrows() < n_row) then info = 36; ierr(1) = 2; ierr(2) = n_row; call psb_errpush(info,name,i_err=ierr) goto 9999 end if - if (y%get_nrows() < n_row) then + if (y%get_nrows() < n_row) then info = 36; ierr(1) = 3; ierr(2) = n_row; call psb_errpush(info,name,i_err=ierr) goto 9999 @@ -140,9 +140,9 @@ subroutine psb_z_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) end if - if (n_col <= size(work)) then + if (n_col <= size(work)) then ww => work(1:n_col) - if ((4*n_col+n_col) <= size(work)) then + if ((4*n_col+n_col) <= size(work)) then aux => work(n_col+1:) else allocate(aux(4*n_col),stat=info) @@ -152,19 +152,19 @@ subroutine psb_z_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) allocate(ww(n_col),aux(4*n_col),stat=info) endif - if (info /= psb_success_) then + if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 + goto 9999 end if do_alloc_wrk = .not.prec%is_allocated_wrk() if (do_alloc_wrk) call prec%allocate_wrk(info,vmold=x%v) associate (wv => prec%wrk(1), wv1 => prec%wrk(2)) - + select case(prec%iprcparm(psb_f_type_)) - case(psb_f_ilu_n_) - + case(psb_f_ilu_n_,psb_f_ilu_k_,psb_f_ilu_t_) + select case(trans_) case('N') call psb_spsm(zone,prec%av(psb_l_pr_),x,zzero,wv,desc_data,info,& @@ -172,31 +172,58 @@ subroutine psb_z_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) if(info == psb_success_) call psb_spsm(alpha,prec%av(psb_u_pr_),wv,& & beta,y,desc_data,info,& & trans=trans_,scale='U',choice=psb_none_, work=aux) - + case('T') call psb_spsm(zone,prec%av(psb_u_pr_),x,zzero,wv,desc_data,info,& & trans=trans_,scale='L',diag=prec%dv,choice=psb_none_, work=aux) if(info == psb_success_) call psb_spsm(alpha,prec%av(psb_l_pr_),wv,& & beta,y,desc_data,info,& & trans=trans_,scale='U',choice=psb_none_,work=aux) - + case('C') - + call psb_spsm(zone,prec%av(psb_u_pr_),x,zzero,wv,desc_data,info,& & trans=trans_,scale='U',choice=psb_none_, work=aux) - + call wv1%mlt(zone,prec%dv,wv,zzero,info,conjgx=trans_) - + if(info == psb_success_) call psb_spsm(alpha,prec%av(psb_l_pr_),wv1,& & beta,y,desc_data,info,& & trans=trans_,scale='U',choice=psb_none_,work=aux) - + + end select + if (info /= psb_success_) then + ch_err="psb_spsm" + goto 9999 + end if + + case(psb_f_ainv_,psb_f_invt_,psb_f_invk_) + ! Application of approximate inverse preconditioner, just some spmm + + select case(trans_) + case('N') + call psb_spmm(zone,prec%av(psb_l_pr_),x,zzero,wv,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + + if (info == psb_success_) call wv1%mlt(zone,prec%dv,wv,zzero,info) + if(info == psb_success_) & + & call psb_spmm(alpha,prec%av(psb_u_pr_),wv1,& + & beta,y,desc_data,info, trans=trans_, work=aux,doswap=.false.) + + case('T','C') + call psb_spmm(zone,prec%av(psb_l_pr_),x,zzero,wv,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + if (info == psb_success_) call wv1%mlt(zone,prec%dv,wv,zzero,info) + if (info == psb_success_) & + & call psb_spmm(alpha,prec%av(psb_u_pr_),wv1, & + & beta,y,desc_data,info,trans=trans_,work=aux,doswap=.false.) + end select - if (info /= psb_success_) then + if (info /= psb_success_) then ch_err="psb_spsm" goto 9999 end if - + case default info = psb_err_internal_error_ call psb_errpush(info,name,a_err='Invalid factorization') @@ -204,12 +231,12 @@ subroutine psb_z_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work) end select end associate - + call psb_halo(y,desc_data,info,data=psb_comm_mov_) if (do_alloc_wrk) call prec%free_wrk(info) - if (n_col <= size(work)) then - if ((4*n_col+n_col) <= size(work)) then + if (n_col <= size(work)) then + if ((4*n_col+n_col) <= size(work)) then else deallocate(aux) endif @@ -231,7 +258,7 @@ end subroutine psb_z_bjac_apply_vect subroutine psb_z_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) use psb_base_mod use psb_z_bjacprec, psb_protect_name => psb_z_bjac_apply - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_z_bjac_prec_type), intent(inout) :: prec complex(psb_dpk_),intent(in) :: alpha,beta @@ -244,6 +271,7 @@ subroutine psb_z_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) ! Local variables integer(psb_ipk_) :: n_row,n_col complex(psb_dpk_), pointer :: ww(:), aux(:) + type(psb_d_vect_type) :: tx,ty type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me integer(psb_ipk_) :: err_act, ierr(5) @@ -273,12 +301,12 @@ subroutine psb_z_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) n_row = desc_data%get_local_rows() n_col = desc_data%get_local_cols() - if (size(x) < n_row) then + if (size(x) < n_row) then info = 36; ierr(1) = 2; ierr(2) = n_row; call psb_errpush(info,name,i_err=ierr) goto 9999 end if - if (size(y) < n_row) then + if (size(y) < n_row) then info = 36; ierr(1) = 3; ierr(2) = n_row; call psb_errpush(info,name,i_err=ierr) goto 9999 @@ -295,29 +323,29 @@ subroutine psb_z_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) end if - if (n_col <= size(work)) then + if (n_col <= size(work)) then ww => work(1:n_col) - if ((4*n_col+n_col) <= size(work)) then + if ((4*n_col+n_col) <= size(work)) then aux => work(n_col+1:) else allocate(aux(4*n_col),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 + goto 9999 end if endif else allocate(ww(n_col),aux(4*n_col),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 + goto 9999 end if endif select case(prec%iprcparm(psb_f_type_)) - case(psb_f_ilu_n_) + case(psb_f_ilu_n_,psb_f_ilu_k_,psb_f_ilu_t_) select case(trans_) case('N') @@ -344,11 +372,42 @@ subroutine psb_z_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) & trans=trans_,scale='U',choice=psb_none_,work=aux) end select - if (info /= psb_success_) then + if (info /= psb_success_) then ch_err="psb_spsm" goto 9999 end if + case(psb_f_ainv_,psb_f_invt_,psb_f_invk_) + ! Application of approximate inverse preconditioner, just some spmm + + select case(trans_) + + case('N') + call psb_spmm(zone,prec%av(psb_l_pr_),x,zzero,ww,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + ww(1:n_row) = ww(1:n_row) * prec%dv%v%v(1:n_row) + if (info == psb_success_) & + & call psb_spmm(alpha,prec%av(psb_u_pr_),ww,beta,y,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + + case('T') + call psb_spmm(zone,prec%av(psb_u_pr_),x,zzero,ww,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + ww(1:n_row) = ww(1:n_row) * prec%dv%v%v(1:n_row) + if (info == psb_success_) & + & call psb_spmm(alpha,prec%av(psb_l_pr_),ww,beta,y,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + + case('C') + call psb_spmm(zone,prec%av(psb_u_pr_),x,zzero,ww,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + ww(1:n_row) = ww(1:n_row) * conjg(prec%dv%v%v(1:n_row)) + if (info == psb_success_) & + & call psb_spmm(alpha,prec%av(psb_l_pr_),ww,beta,y,desc_data,info,& + & trans=trans_,work=aux,doswap=.false.) + + end select + case default info = psb_err_internal_error_ @@ -358,8 +417,8 @@ subroutine psb_z_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work) call psb_halo(y,desc_data,info,data=psb_comm_mov_) - if (n_col <= size(work)) then - if ((4*n_col+n_col) <= size(work)) then + if (n_col <= size(work)) then + if ((4*n_col+n_col) <= size(work)) then else deallocate(aux) endif @@ -397,11 +456,26 @@ subroutine psb_z_bjac_precinit(prec,info) call psb_Errpush(info,name) goto 9999 end if + call psb_realloc(psb_rfpsz,prec%rprcparm,info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_Errpush(info,name) + goto 9999 + end if prec%iprcparm(:) = 0 prec%iprcparm(psb_p_type_) = psb_bjac_ prec%iprcparm(psb_f_type_) = psb_f_ilu_n_ prec%iprcparm(psb_ilu_fill_in_) = 0 + prec%iprcparm(psb_ilu_ialg_) = psb_ilu_n_ + prec%iprcparm(psb_ilu_scale_) = psb_ilu_scale_none_ + prec%iprcparm(psb_inv_fillin_) = 0 + prec%iprcparm(psb_ainv_alg_) = psb_ainv_llk_ + + + prec%rprcparm(:) = 0 + prec%rprcparm(psb_fact_eps_) = 1E-1_psb_dpk_ + prec%rprcparm(psb_inv_thresh_) = 1E-1_psb_dpk_ call psb_erractionrestore(err_act) @@ -416,7 +490,8 @@ end subroutine psb_z_bjac_precinit subroutine psb_z_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) use psb_base_mod - use psb_prec_mod, only : psb_ilu_fct + use psb_z_ilu_fact_mod + use psb_z_ainv_fact_mod use psb_z_bjacprec, psb_protect_name => psb_z_bjac_precbld Implicit None @@ -428,12 +503,13 @@ subroutine psb_z_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) class(psb_z_base_vect_type), intent(in), optional :: vmold class(psb_i_base_vect_type), intent(in), optional :: imold - ! .. Local Scalars .. - integer(psb_ipk_) :: i, m + ! .. Local Scalars .. + integer(psb_ipk_) :: i, m, ialg, fill_in, iscale, inv_fill, iinvalg integer(psb_ipk_) :: ierr(5) character :: trans, unitd - type(psb_z_csr_sparse_mat), allocatable :: lf, uf + type(psb_zspmat_type), allocatable :: lf, uf complex(psb_dpk_), allocatable :: dd(:) + real(psb_dpk_) :: fact_eps, inv_thresh integer(psb_ipk_) :: nztota, err_act, n_row, nrow_a,n_col, nhalo type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me @@ -462,19 +538,114 @@ subroutine psb_z_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) trans = 'N' unitd = 'U' + ! We check if all the information contained in the preconditioner structure + ! are meaningful, otherwise we give an error and get out of the build + ! procedure + ialg = prec%iprcparm(psb_ilu_ialg_) ! Integer for ILU type algorithm + iinvalg = prec%iprcparm(psb_ainv_alg_) ! Integer for AINV type algorithm + iscale = prec%iprcparm(psb_ilu_scale_) ! Integer for scaling of matrix + fact_eps = prec%rprcparm(psb_fact_eps_) ! Drop-tolerance for factorization + inv_thresh = prec%rprcparm(psb_inv_thresh_) ! Drop-tolerance for inverse + fill_in = prec%iprcparm(psb_ilu_fill_in_) ! Fill-In for factorization + inv_fill = prec%iprcparm(psb_inv_fillin_) ! Fill-In for inverse factorization + + ! Check if the type of scaling is known, pay attention that not all the + ! scalings make sense for all the factorization, if something that does not + ! make sense is required the factorization routine will fail in an + ! unnrecoverable way. + if ((iscale == psb_ilu_scale_none_).or.& + (iscale == psb_ilu_scale_maxval_).or.& + (iscale == psb_ilu_scale_diag_).or.& + (iscale == psb_ilu_scale_arwsum_).or.& + (iscale == psb_ilu_scale_aclsum_).or.& + (iscale == psb_ilu_scale_arcsum_)) then + ! Do nothing: admissible request + else + info=psb_err_from_subroutine_ + ch_err='psb_ilu_scale_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + select case (prec%iprcparm(psb_f_type_)) + case (psb_f_ainv_) + ! Check if the variant for the AINV is known to the library + select case (iinvalg) + case(psb_ainv_llk_,psb_ainv_s_llk_,psb_ainv_s_ft_llk_,psb_ainv_llk_noth_,& + & psb_ainv_mlk_) + ! Do nothing these are okay + case default + info=psb_err_from_subroutine_ + ch_err='psb_ainv_alg_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end select ! AINV Variant + ! Check if the drop-tolerance make sense + if( inv_thresh > 1) then + info=psb_err_from_subroutine_ + ch_err='psb_inv_thresh_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + case (psb_f_ilu_t_) + if (fact_eps > 1) then + ! Check if the drop-tolerance make sense + info=psb_err_from_subroutine_ + ch_err='psb_fact_eps_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + case (psb_f_invt_) + ! Check both tolerances + if (fact_eps > 1) then + info=psb_err_from_subroutine_ + ch_err='psb_fact_eps_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if( inv_thresh > 1) then + info=psb_err_from_subroutine_ + ch_err='psb_inv_thresh_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + case default + end select + + + ! Checks relative to the fill-in parameters + if (prec%iprcparm(psb_f_type_) == psb_f_ilu_n_) then + if(fill_in < 0) then + info=psb_err_from_subroutine_ + ch_err='psb_ilu_fill_in_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + else if (fill_in == 0) then + ! If the requested level of fill is equal to zero, we default to the + ! specialized ILU(0) routine + prec%iprcparm(psb_f_type_) = psb_f_ilu_n_ + end if + end if + ! If no limit on the fill_in is required we allow every fill, this is needed + ! since this quantity is used to allocate the auxiliary vectors for the + ! factorization + if (inv_fill <= 0) inv_fill = m + + ! Select on the type of factorization to be used select case(prec%iprcparm(psb_f_type_)) - case(psb_f_ilu_n_) + case(psb_f_ilu_n_) + ! ILU(0) Factorization: the total number of nonzeros of the factorized matrix + ! is equal to the one of the input matrix - if (allocated(prec%av)) then - if (size(prec%av) < psb_bp_ilu_avsz) then - do i=1,size(prec%av) + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) call prec%av(i)%free() enddo deallocate(prec%av,stat=info) endif end if - if (.not.allocated(prec%av)) then + if (.not.allocated(prec%av)) then allocate(prec%av(psb_max_avsz),stat=info) if (info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) @@ -501,27 +672,27 @@ subroutine psb_z_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) end if allocate(dd(n_row),stat=info) - if (info == psb_success_) then + if (info == psb_success_) then allocate(prec%dv, stat=info) - if (info == 0) then - if (present(vmold)) then - allocate(prec%dv%v,mold=vmold,stat=info) - else - allocate(psb_z_base_vect_type :: prec%dv%v,stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_z_base_vect_type :: prec%dv%v,stat=info) end if end if end if - if (info /= psb_success_) then + if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 + goto 9999 endif - ! This is where we have no renumbering, thus no need - call psb_ilu_fct(a,lf,uf,dd,info) + ! This is where we have no renumbering, thus no need + call psb_ilu0_fact(ialg,a,lf,uf,dd,info) if(info == psb_success_) then - call prec%av(psb_l_pr_)%mv_from(lf) - call prec%av(psb_u_pr_)%mv_from(uf) + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) call prec%av(psb_l_pr_)%set_asb() call prec%av(psb_u_pr_)%set_asb() call prec%av(psb_l_pr_)%trim() @@ -530,12 +701,375 @@ subroutine psb_z_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) ! call move_alloc(dd,prec%d) else info=psb_err_from_subroutine_ - ch_err='psb_ilu_fct' + ch_err='psb_ilu0_fact' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if - case(psb_f_none_) + case(psb_f_ilu_k_) + ! ILU(N) Incomplete LU-factorization with N levels of fill-in. Depending on + ! the type of the variant of the algorithm the may be forgotten or added to + ! the diagonal (MILU) + + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) + call prec%av(i)%free() + enddo + deallocate(prec%av,stat=info) + endif + end if + if (.not.allocated(prec%av)) then + allocate(prec%av(psb_max_avsz),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + endif + + nrow_a = desc_a%get_local_rows() + nztota = a%get_nzeros() + + n_col = desc_a%get_local_cols() + nhalo = n_col-nrow_a + n_row = nrow_a + + allocate(lf,uf,stat=info) + if (info == psb_success_) call lf%allocate(n_row,n_row,nztota) + if (info == psb_success_) call uf%allocate(n_row,n_row,nztota) + + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(dd(n_row),stat=info) + if (info == psb_success_) then + allocate(prec%dv, stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_z_base_vect_type :: prec%dv%v,stat=info) + end if + end if + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + endif + ! This is where we have no renumbering, thus no need + call psb_iluk_fact(fill_in,ialg,a,lf,uf,dd,info) + + if(info == psb_success_) then + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) + call prec%av(psb_l_pr_)%set_asb() + call prec%av(psb_u_pr_)%set_asb() + call prec%av(psb_l_pr_)%trim() + call prec%av(psb_u_pr_)%trim() + call prec%dv%bld(dd) + ! call move_alloc(dd,prec%d) + else + info=psb_err_from_subroutine_ + ch_err='psb_iluk_fact' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(psb_f_ilu_t_) + ! ILU(N,E) Incomplete LU factorization with thresholding and level of fill + + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) + call prec%av(i)%free() + enddo + deallocate(prec%av,stat=info) + endif + end if + if (.not.allocated(prec%av)) then + allocate(prec%av(psb_max_avsz),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + endif + + nrow_a = desc_a%get_local_rows() + nztota = a%get_nzeros() + + n_col = desc_a%get_local_cols() + nhalo = n_col-nrow_a + n_row = nrow_a + + allocate(lf,uf,stat=info) + if (info == psb_success_) call lf%allocate(n_row,n_row,nztota) + if (info == psb_success_) call uf%allocate(n_row,n_row,nztota) + + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(dd(n_row),stat=info) + if (info == psb_success_) then + allocate(prec%dv, stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_z_base_vect_type :: prec%dv%v,stat=info) + end if + end if + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + endif + ! This is where we have no renumbering, thus no need + call psb_ilut_fact(fill_in,fact_eps,a,lf,uf,dd,info,iscale=iscale) + + if(info == psb_success_) then + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) + call prec%av(psb_l_pr_)%set_asb() + call prec%av(psb_u_pr_)%set_asb() + call prec%av(psb_l_pr_)%trim() + call prec%av(psb_u_pr_)%trim() + call prec%dv%bld(dd) + ! call move_alloc(dd,prec%d) + else + info=psb_err_from_subroutine_ + ch_err='psb_ilut_fact' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(psb_f_ainv_) + ! Approximate Inverse Factorizations based on variants of the incomplete + ! biconjugation algorithms + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) + call prec%av(i)%free() + enddo + deallocate(prec%av,stat=info) + endif + end if + if (.not.allocated(prec%av)) then + allocate(prec%av(psb_max_avsz),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + endif + + nrow_a = desc_a%get_local_rows() + nztota = a%get_nzeros() + + n_col = desc_a%get_local_cols() + nhalo = n_col-nrow_a + n_row = nrow_a + + allocate(lf,uf,stat=info) + if (info == psb_success_) call lf%allocate(n_row,n_row,nztota) + if (info == psb_success_) call uf%allocate(n_row,n_row,nztota) + + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(dd(n_row),stat=info) + if (info == psb_success_) then + allocate(prec%dv, stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_z_base_vect_type :: prec%dv%v,stat=info) + end if + end if + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + endif + ! Computing the factorization + call psb_ainv_fact(a,iinvalg,inv_fill,inv_thresh,lf,dd,uf,desc_a,info,iscale=iscale) + + if(info == psb_success_) then + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) + call prec%av(psb_l_pr_)%set_asb() + call prec%av(psb_u_pr_)%set_asb() + call prec%av(psb_l_pr_)%trim() + call prec%av(psb_u_pr_)%trim() + call prec%dv%bld(dd) + ! call move_alloc(dd,prec%d) + else + info=psb_err_from_subroutine_ + ch_err='psb_ilut_fact' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(psb_f_invk_) + ! Approximate Inverse Factorizations based on the sparse inversion of + ! triangular factors of an ILU(n) factorization + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) + call prec%av(i)%free() + enddo + deallocate(prec%av,stat=info) + endif + end if + if (.not.allocated(prec%av)) then + allocate(prec%av(psb_max_avsz),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + endif + + nrow_a = desc_a%get_local_rows() + nztota = a%get_nzeros() + + n_col = desc_a%get_local_cols() + nhalo = n_col-nrow_a + n_row = nrow_a + + allocate(lf,uf,stat=info) + if (info == psb_success_) call lf%allocate(n_row,n_row,nztota) + if (info == psb_success_) call uf%allocate(n_row,n_row,nztota) + + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(dd(n_row),stat=info) + if (info == psb_success_) then + allocate(prec%dv, stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_z_base_vect_type :: prec%dv%v,stat=info) + end if + end if + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + endif + ! Computing the factorization + call psb_invk_fact(a,fill_in, inv_fill,lf,dd,uf,desc_a,info) + + if(info == psb_success_) then + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) + call prec%av(psb_l_pr_)%set_asb() + call prec%av(psb_u_pr_)%set_asb() + call prec%av(psb_l_pr_)%trim() + call prec%av(psb_u_pr_)%trim() + call prec%dv%bld(dd) + ! call move_alloc(dd,prec%d) + else + info=psb_err_from_subroutine_ + ch_err='psb_ilut_fact' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(psb_f_invt_) + ! Approximate Inverse Factorizations based on the sparse inversion of + ! triangular factors of an ILU(eps) factorization + if (allocated(prec%av)) then + if (size(prec%av) < psb_bp_ilu_avsz) then + do i=1,size(prec%av) + call prec%av(i)%free() + enddo + deallocate(prec%av,stat=info) + endif + end if + if (.not.allocated(prec%av)) then + allocate(prec%av(psb_max_avsz),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + endif + + nrow_a = desc_a%get_local_rows() + nztota = a%get_nzeros() + + n_col = desc_a%get_local_cols() + nhalo = n_col-nrow_a + n_row = nrow_a + + allocate(lf,uf,stat=info) + if (info == psb_success_) call lf%allocate(n_row,n_row,nztota) + if (info == psb_success_) call uf%allocate(n_row,n_row,nztota) + + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(dd(n_row),stat=info) + if (info == psb_success_) then + allocate(prec%dv, stat=info) + if (info == 0) then + if (present(vmold)) then + allocate(prec%dv%v,mold=vmold,stat=info) + else + allocate(psb_z_base_vect_type :: prec%dv%v,stat=info) + end if + end if + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + endif + ! Computing the factorization + call psb_invt_fact(a,fill_in,inv_fill,fact_eps,inv_thresh,lf,dd,uf,& + & desc_a,info) + + if(info == psb_success_) then + call prec%av(psb_l_pr_)%mv_from(lf%a) + call prec%av(psb_u_pr_)%mv_from(uf%a) + call prec%av(psb_l_pr_)%set_asb() + call prec%av(psb_u_pr_)%set_asb() + call prec%av(psb_l_pr_)%trim() + call prec%av(psb_u_pr_)%trim() + call prec%dv%bld(dd) + ! call move_alloc(dd,prec%d) + else + info=psb_err_from_subroutine_ + ch_err='psb_ilut_fact' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(psb_f_none_) info=psb_err_from_subroutine_ ch_err='Inconsistent prec psb_f_none_' call psb_errpush(info,name,a_err=ch_err) @@ -548,7 +1082,7 @@ subroutine psb_z_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold) goto 9999 end select - if (present(amold)) then + if (present(amold)) then call prec%av(psb_l_pr_)%cscnv(info,mold=amold) call prec%av(psb_u_pr_)%cscnv(info,mold=amold) end if @@ -567,8 +1101,8 @@ subroutine psb_z_bjac_precseti(prec,what,val,info) Implicit None class(psb_z_bjac_prec_type),intent(inout) :: prec - integer(psb_ipk_), intent(in) :: what - integer(psb_ipk_), intent(in) :: val + integer(psb_ipk_), intent(in) :: what + integer(psb_ipk_), intent(in) :: val integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act, nrow character(len=20) :: name='z_bjac_precset' @@ -576,34 +1110,33 @@ subroutine psb_z_bjac_precseti(prec,what,val,info) call psb_erractionsave(err_act) info = psb_success_ - if (.not.allocated(prec%iprcparm)) then + if (.not.allocated(prec%iprcparm)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") goto 9999 end if select case(what) - case (psb_f_type_) - if (prec%iprcparm(psb_p_type_) /= psb_bjac_) then - write(psb_err_unit,*) 'WHAT is invalid for current preconditioner ',& - & prec%iprcparm(psb_p_type_),& - & 'ignoring user specification' - return - endif + case (psb_f_type_) prec%iprcparm(psb_f_type_) = val - case (psb_ilu_fill_in_) - if ((prec%iprcparm(psb_p_type_) /= psb_bjac_).or.& - & (prec%iprcparm(psb_f_type_) /= psb_f_ilu_n_)) then - write(psb_err_unit,*) 'WHAT is invalid for current preconditioner ',& - & prec%iprcparm(psb_p_type_),& - & 'ignoring user specification' - return - endif + case (psb_ilu_fill_in_) prec%iprcparm(psb_ilu_fill_in_) = val + case (psb_ilu_ialg_) + prec%iprcparm(psb_ilu_ialg_) = val + + case (psb_ilu_scale_) + prec%iprcparm(psb_ilu_scale_) = val + + case (psb_ainv_alg_) + prec%iprcparm(psb_ainv_alg_) = val + + case(psb_inv_fillin_) + prec%iprcparm(psb_inv_fillin_) = val + case default - write(psb_err_unit,*) 'WHAT is invalid, ignoring user specification' + write(psb_err_unit,'(i0," is invalid, ignoring user specification")') what end select @@ -613,3 +1146,45 @@ subroutine psb_z_bjac_precseti(prec,what,val,info) 9999 call psb_error_handler(err_act) return end subroutine psb_z_bjac_precseti + +subroutine psb_z_bjac_precsetr(prec,what,val,info) + + use psb_base_mod + use psb_z_bjacprec, psb_protect_name => psb_z_bjac_precsetr + Implicit None + + class(psb_z_bjac_prec_type),intent(inout) :: prec + integer(psb_ipk_), intent(in) :: what + real(psb_dpk_), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act, nrow + character(len=20) :: name='z_bjac_precset' + + call psb_erractionsave(err_act) + + info = psb_success_ + if (.not.allocated(prec%rprcparm)) then + info = 1124 + call psb_errpush(info,name,a_err="preconditioner") + goto 9999 + end if + + select case(what) + + case (psb_fact_eps_) + prec%rprcparm(psb_fact_eps_) = val + + case (psb_inv_thresh_) + prec%rprcparm(psb_inv_thresh_) = val + + case default + write(psb_err_unit,'(i0," is invalid, ignoring user specification")') what + + end select + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_z_bjac_precsetr diff --git a/prec/impl/psb_z_ilu0_fact.f90 b/prec/impl/psb_z_ilu0_fact.f90 index 867da777..26322e95 100644 --- a/prec/impl/psb_z_ilu0_fact.f90 +++ b/prec/impl/psb_z_ilu0_fact.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,21 +27,21 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! +! ! Moved here from MLD2P4, original copyright below. -! -! -! +! +! +! ! MLD2P4 version 2.2 ! MultiLevel Domain Decomposition Parallel Preconditioners Package ! based on PSBLAS (Parallel Sparse BLAS version 3.5) -! -! (C) Copyright 2008-2018 -! -! Salvatore Filippone -! Pasqua D'Ambra -! Daniela di Serafino -! +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -53,7 +53,7 @@ ! 3. The name of the MLD2P4 group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -65,8 +65,8 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! ! File: psb_zilu0_fact.f90 ! ! Subroutine: psb_zilu0_fact @@ -75,7 +75,7 @@ ! ! This routine computes either the ILU(0) or the MILU(0) factorization of ! the diagonal blocks of a distributed matrix. These factorizations are used -! to build the 'base preconditioner' (block-Jacobi preconditioner/solver, +! to build the 'base preconditioner' (block-Jacobi preconditioner/solver, ! Additive Schwarz preconditioner) corresponding to a given level of a ! multilevel preconditioner. ! @@ -83,10 +83,10 @@ ! Y. Saad, Iterative Methods for Sparse Linear Systems, Second Edition, ! SIAM, 2003, Chapter 10. ! -! The local matrix is stored into a and blck, as specified in the description -! of the arguments below. The storage format for both the L and U factors is CSR. -! The diagonal of the U factor is stored separately (actually, the inverse of the -! diagonal entries is stored; this is then managed in the solve stage associated +! The local matrix is stored into a and blck, as specified in the description +! of the arguments below. The storage format for both the L and U factors is CSR. +! The diagonal of the U factor is stored separately (actually, the inverse of the +! diagonal entries is stored; this is then managed in the solve stage associated ! to the ILU(0)/MILU(0) factorization). ! ! The routine copies and factors "on the fly" from a and blck into l (L factor), @@ -94,7 +94,7 @@ ! ! This implementation of ILU(0)/MILU(0) is faster than the implementation in ! psb_ziluk_fct (the latter routine performs the more general ILU(k)/MILU(k)). -! +! ! ! Arguments: ! ialg - integer, input. @@ -121,7 +121,7 @@ ! factorization. ! Note: its allocation is managed by the calling routine psb_ilu_bld, ! hence it cannot be only intent(out). -! info - integer, output. +! info - integer, output. ! Error code. ! blck - type(psb_zspmat_type), input, optional, target. ! The sparse matrix structure containing the remote rows of the @@ -129,7 +129,7 @@ ! to build an Additive Schwarz base preconditioner with overlap ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck is empty. -! +! subroutine psb_zilu0_fact(ialg,a,l,u,d,info,blck, upd) use psb_base_mod @@ -157,30 +157,30 @@ subroutine psb_zilu0_fact(ialg,a,l,u,d,info,blck, upd) info = psb_success_ call psb_erractionsave(err_act) - ! + ! ! Point to / allocate memory for the incomplete factorization ! - if (present(blck)) then + if (present(blck)) then blck_ => blck else - allocate(blck_,stat=info) - if (info == psb_success_) call blck_%csall(izero,izero,info,ione) + allocate(blck_,stat=info) + if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR') if (info /= psb_success_) then info=psb_err_from_subroutine_ - ch_err='csall' + ch_err='allocate' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if endif - if (present(upd)) then - upd_ = psb_toupper(upd) + if (present(upd)) then + upd_ = psb_toupper(upd) else upd_ = 'F' end if - + m = a%get_nrows() + blck_%get_nrows() if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.& - & (m > size(d)) ) then + & (m > size(d)) ) then write(0,*) 'Wrong allocation status for L,D,U? ',& & l%get_nrows(),size(d),u%get_nrows() info = -1 @@ -212,12 +212,12 @@ subroutine psb_zilu0_fact(ialg,a,l,u,d,info,blck, upd) call u%set_triangle() call u%set_unit() call u%set_upper() - + ! ! Nullify pointer / deallocate memory ! - if (present(blck)) then - blck_ => null() + if (present(blck)) then + blck_ => null() else call blck_%free() if(info.ne.0) then @@ -226,7 +226,7 @@ subroutine psb_zilu0_fact(ialg,a,l,u,d,info,blck, upd) call psb_errpush(info,name,a_err=ch_err) goto 9999 end if - deallocate(blck_) + deallocate(blck_) endif call psb_erractionrestore(err_act) @@ -243,9 +243,9 @@ contains ! Version: complex ! Note: internal subroutine of psb_zilu0_fact. ! - ! This routine computes either the ILU(0) or the MILU(0) factorization of the - ! diagonal blocks of a distributed matrix. - ! These factorizations are used to build the 'base preconditioner' + ! This routine computes either the ILU(0) or the MILU(0) factorization of the + ! diagonal blocks of a distributed matrix. + ! These factorizations are used to build the 'base preconditioner' ! (block-Jacobi preconditioner/solver, Additive Schwarz ! preconditioner) corresponding to a given level of a multilevel preconditioner. ! @@ -257,7 +257,7 @@ contains ! ! The routine copies and factors "on the fly" from the sparse matrix structures a ! and b into the arrays lval, uval, d (L, U without its diagonal, diagonal of U). - ! + ! ! ! Arguments: ! ialg - integer, input. @@ -296,7 +296,7 @@ contains ! according to the CSR storage format. ! lirp - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the L factor in lval, according to the CSR storage format. + ! of the L factor in lval, according to the CSR storage format. ! uval - complex(psb_dpk_), dimension(:), input/output. ! The U factor in the incomplete factorization. ! The entries of U are stored according to the CSR format. @@ -305,18 +305,18 @@ contains ! according to the CSR storage format. ! uirp - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the U factor in uval, according to the CSR storage format. + ! of the U factor in uval, according to the CSR storage format. ! l1 - integer, output. ! The number of nonzero entries in lval. ! l2 - integer, output. ! The number of nonzero entries in uval. - ! info - integer, output. + ! info - integer, output. ! Error code. ! subroutine psb_zilu0_factint(ialg,a,b,& & d,lval,lja,lirp,uval,uja,uirp,l1,l2,upd,info) - implicit none + implicit none ! Arguments integer(psb_ipk_), intent(in) :: ialg @@ -332,7 +332,7 @@ contains complex(psb_dpk_) :: dia,temp integer(psb_ipk_), parameter :: nrb=16 type(psb_z_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) + integer(psb_ipk_) :: int_err(5) character(len=20) :: name, ch_err name='psb_zilu0_factint' @@ -346,10 +346,10 @@ contains select case(ialg) case(psb_ilu_n_,psb_milu_n_) - ! Ok + ! Ok case default info=psb_err_input_asize_invalid_i_ - call psb_errpush(info,name,& + call psb_errpush(info,name,& & i_err=(/ione,ialg,izero,izero,izero/)) goto 9999 end select @@ -363,7 +363,7 @@ contains end if m = ma+mb - if (psb_toupper(upd) == 'F' ) then + if (psb_toupper(upd) == 'F' ) then lirp(1) = 1 uirp(1) = 1 l1 = 0 @@ -379,14 +379,14 @@ contains if (i <= ma) then ! ! Copy the i-th local row of the matrix, stored in a, - ! into lval/d(i)/uval + ! into lval/d(i)/uval ! call ilu_copyin(i,ma,a,i,ione,m,l1,lja,lval,& & d(i),l2,uja,uval,ktrw,trw,upd) else ! ! Copy the i-th local row of the matrix, stored in b - ! (as (i-ma)-th row), into lval/d(i)/uval + ! (as (i-ma)-th row), into lval/d(i)/uval ! call ilu_copyin(i-ma,mb,b,i,ione,m,l1,lja,lval,& & d(i),l2,uja,uval,ktrw,trw,upd) @@ -437,7 +437,7 @@ contains ! dia = dia - temp*uval(jj) cycle updateloop - ! + ! else if (j > i) then ! ! search u(i,*) (i-th row of U) for a matching index j @@ -454,23 +454,23 @@ contains end if enddo end if - ! - ! If we get here we missed the cycle updateloop, which means + ! + ! If we get here we missed the cycle updateloop, which means ! that this entry does not match; thus we accumulate on the ! diagonal for MILU(0). ! - if (ialg == psb_milu_n_) then + if (ialg == psb_milu_n_) then dia = dia - temp*uval(jj) end if enddo updateloop enddo - ! + ! ! Check the pivot size - ! + ! if (abs(dia) < d_epstol) then ! ! Too small pivot: unstable factorization - ! + ! info = psb_err_pivot_too_small_ int_err(1) = i write(ch_err,'(g20.10)') abs(dia) @@ -493,7 +493,7 @@ contains else write(0,*) 'Update not implemented ' info = 31 - call psb_errpush(info,name,& + call psb_errpush(info,name,& & i_err=(/ione*13,izero,izero,izero,izero/),a_err=upd) goto 9999 @@ -515,7 +515,7 @@ contains ! Version: complex ! Note: internal subroutine of psb_zilu0_fact ! - ! This routine copies a row of a sparse matrix A, stored in the psb_zspmat_type + ! This routine copies a row of a sparse matrix A, stored in the psb_zspmat_type ! data structure a, into the arrays lval and uval and into the scalar variable ! dia, corresponding to the lower and upper triangles of A and to the diagonal ! entry of the row, respectively. The entries in lval and uval are stored @@ -529,14 +529,14 @@ contains ! ! The routine is used by psb_zilu0_factint in the computation of the ILU(0)/MILU(0) ! factorization of a local sparse matrix. - ! + ! ! TODO: modify the routine to allow copying into output L and U that are ! already filled with indices; this would allow computing an ILU(k) pattern, - ! then use the ILU(0) internal for subsequent calls with the same pattern. + ! then use the ILU(0) internal for subsequent calls with the same pattern. ! ! Arguments: ! i - integer, input. - ! The local index of the row to be extracted from the + ! The local index of the row to be extracted from the ! sparse matrix structure a. ! m - integer, input. ! The number of rows of the local matrix stored into a. @@ -570,13 +570,13 @@ contains ! to the CSR storage format. ! uval - complex(psb_dpk_), dimension(:), input/output. ! The array where the entries of the row corresponding to the - ! upper triangle are copied. + ! upper triangle are copied. ! ktrw - integer, input/output. ! The index identifying the last entry taken from the ! staging buffer trw. See below. ! trw - type(psb_zspmat_type), input/output. ! A staging buffer. If the matrix A is not in CSR format, we use - ! the psb_sp_getblk routine and store its output in trw; when we + ! the psb_sp_getblk routine and store its output in trw; when we ! need to call psb_sp_getblk we do it for a block of rows, and then ! we consume them from trw in successive calls to this routine, ! until we empty the buffer. Thus we will make a call to psb_sp_getblk @@ -608,10 +608,10 @@ contains if (psb_errstatus_fatal()) then info = psb_err_internal_error_; goto 9999 end if - if (psb_toupper(upd) == 'F') then + if (psb_toupper(upd) == 'F') then - select type(aa => a%a) - type is (psb_z_csr_sparse_mat) + select type(aa => a%a) + type is (psb_z_csr_sparse_mat) ! ! Take a fast shortcut if the matrix is stored in CSR format @@ -633,19 +633,19 @@ contains end if enddo - class default + class default ! - ! Otherwise use psb_sp_getblk, slower but able (in principle) of + ! Otherwise use psb_sp_getblk, slower but able (in principle) of ! handling any format. In this case, a block of rows is extracted ! instead of a single row, for performance reasons, and these ! rows are copied one by one into lval, dia, uval, through ! successive calls to ilu_copyin. ! - if ((mod(i,nrb) == 1).or.(nrb == 1)) then + if ((mod(i,nrb) == 1).or.(nrb == 1)) then irb = min(m-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) + call aa%csget(i,i+irb-1,trw,info) if(info /= psb_success_) then info=psb_err_from_subroutine_ ch_err='csget' @@ -656,7 +656,7 @@ contains end if nz = trw%get_nzeros() - do + do if (ktrw > nz) exit if (trw%ia(ktrw) > i) exit k = trw%ja(ktrw) @@ -680,7 +680,7 @@ contains write(0,*) 'Update not implemented ' info = 31 - call psb_errpush(info,name,& + call psb_errpush(info,name,& & i_err=(/ione*13,izero,izero,izero,izero/),a_err=upd) goto 9999 diff --git a/prec/impl/psb_z_iluk_fact.f90 b/prec/impl/psb_z_iluk_fact.f90 index fe9e92d9..1a398cda 100644 --- a/prec/impl/psb_z_iluk_fact.f90 +++ b/prec/impl/psb_z_iluk_fact.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,21 +27,21 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! +! ! Moved here from MLD2P4, original copyright below. -! -! -! +! +! +! ! MLD2P4 version 2.2 ! MultiLevel Domain Decomposition Parallel Preconditioners Package ! based on PSBLAS (Parallel Sparse BLAS version 3.5) -! -! (C) Copyright 2008-2018 -! -! Salvatore Filippone -! Pasqua D'Ambra -! Daniela di Serafino -! +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -53,7 +53,7 @@ ! 3. The name of the MLD2P4 group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -65,8 +65,8 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! ! File: psb_ziluk_fact.f90 ! ! Subroutine: psb_ziluk_fact @@ -74,7 +74,7 @@ ! Contains: psb_ziluk_factint, iluk_copyin, iluk_fact, iluk_copyout. ! ! This routine computes either the ILU(k) or the MILU(k) factorization of the -! diagonal blocks of a distributed matrix. These factorizations are used to +! diagonal blocks of a distributed matrix. These factorizations are used to ! build the 'base preconditioner' (block-Jacobi preconditioner/solver, ! Additive Schwarz preconditioner) corresponding to a certain level of a ! multilevel preconditioner. @@ -88,7 +88,7 @@ ! U factors is CSR. The diagonal of the U factor is stored separately (actually, ! the inverse of the diagonal entries is stored; this is then managed in the solve ! stage associated to the ILU(k)/MILU(k) factorization). -! +! ! ! Arguments: ! fill_in - integer, input. @@ -118,7 +118,7 @@ ! factorization. ! Note: its allocation is managed by the calling routine psb_ilu_bld, ! hence it cannot be only intent(out). -! info - integer, output. +! info - integer, output. ! Error code. ! blck - type(psb_zspmat_type), input, optional, target. ! The sparse matrix structure containing the remote rows of the @@ -126,7 +126,7 @@ ! to build an Additive Schwarz base preconditioner with overlap ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck does not contain any row. -! +! subroutine psb_ziluk_fact(fill_in,ialg,a,l,u,d,info,blck) use psb_base_mod @@ -143,7 +143,7 @@ subroutine psb_ziluk_fact(fill_in,ialg,a,l,u,d,info,blck) complex(psb_dpk_), intent(inout) :: d(:) ! Local Variables integer(psb_ipk_) :: l1, l2, m, err_act - + type(psb_zspmat_type), pointer :: blck_ type(psb_z_csr_sparse_mat) :: ll, uu character(len=20) :: name, ch_err @@ -152,17 +152,17 @@ subroutine psb_ziluk_fact(fill_in,ialg,a,l,u,d,info,blck) info = psb_success_ call psb_erractionsave(err_act) - ! + ! ! Point to / allocate memory for the incomplete factorization ! - if (present(blck)) then + if (present(blck)) then blck_ => blck else - allocate(blck_,stat=info) - if (info == psb_success_) call blck_%csall(izero,izero,info,ione) + allocate(blck_,stat=info) + if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR') if (info /= psb_success_) then info=psb_err_from_subroutine_ - ch_err='csall' + ch_err='allocate' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if @@ -170,7 +170,7 @@ subroutine psb_ziluk_fact(fill_in,ialg,a,l,u,d,info,blck) m = a%get_nrows() + blck_%get_nrows() if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.& - & (m > size(d)) ) then + & (m > size(d)) ) then write(0,*) 'Wrong allocation status for L,D,U? ',& & l%get_nrows(),size(d),u%get_nrows() info = -1 @@ -203,15 +203,15 @@ subroutine psb_ziluk_fact(fill_in,ialg,a,l,u,d,info,blck) call u%set_triangle() call u%set_unit() call u%set_upper() - + ! ! Nullify pointer / deallocate memory ! - if (present(blck)) then - blck_ => null() + if (present(blck)) then + blck_ => null() else call blck_%free() - deallocate(blck_,stat=info) + deallocate(blck_,stat=info) if(info.ne.0) then info=psb_err_from_subroutine_ ch_err='psb_sp_free' @@ -280,7 +280,7 @@ contains ! according to the CSR storage format. ! lia2 - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the L factor in laspk, according to the CSR storage format. + ! of the L factor in laspk, according to the CSR storage format. ! uval - complex(psb_dpk_), dimension(:), input/output. ! The U factor in the incomplete factorization. ! The entries of U are stored according to the CSR format. @@ -289,12 +289,12 @@ contains ! according to the CSR storage format. ! uirp - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the U factor in uval, according to the CSR storage format. + ! of the U factor in uval, according to the CSR storage format. ! l1 - integer, output ! The number of nonzero entries in laspk. ! l2 - integer, output ! The number of nonzero entries in uval. - ! info - integer, output. + ! info - integer, output. ! Error code. ! subroutine psb_ziluk_factint(fill_in,ialg,a,b,& @@ -304,7 +304,7 @@ contains implicit none - ! Arguments + ! Arguments integer(psb_ipk_), intent(in) :: fill_in, ialg type(psb_zspmat_type),intent(in) :: a,b integer(psb_ipk_),intent(inout) :: l1,l2,info @@ -330,14 +330,14 @@ contains select case(ialg) case(psb_ilu_n_,psb_milu_n_) - ! Ok + ! Ok case default info=psb_err_input_asize_invalid_i_ call psb_errpush(info,name,& & i_err=(/itwo,ialg,izero,izero,izero/)) goto 9999 end select - if (fill_in < 0) then + if (fill_in < 0) then info=psb_err_input_asize_invalid_i_ call psb_errpush(info,name, & & i_err=(/ione,fill_in,izero,izero,izero/)) @@ -349,7 +349,7 @@ contains m = ma+mb ! - ! Allocate a temporary buffer for the iluk_copyin function + ! Allocate a temporary buffer for the iluk_copyin function ! call trw%allocate(izero,izero,ione) @@ -361,7 +361,7 @@ contains call psb_errpush(info,name,a_err='psb_sp_all') goto 9999 end if - + l1=0 l2=0 lirp(1) = 1 @@ -381,34 +381,34 @@ contains uplevs(:) = m+1 row(:) = zzero rowlevs(:) = -(m+1) - + ! ! Cycle over the matrix rows ! do i = 1, m - + ! ! At each iteration of the loop we keep in a heap the column indices ! affected by the factorization. The heap is initialized and filled ! in the iluk_copyin routine, and updated during the elimination, in ! the iluk_fact routine. The heap is ideal because at each step we need ! the lowest index, but we also need to insert new items, and the heap - ! allows to do both in log time. + ! allows to do both in log time. ! d(i) = zzero if (i<=ma) then ! - ! Copy into trw the i-th local row of the matrix, stored in a - ! + ! Copy into trw the i-th local row of the matrix, stored in a + ! call iluk_copyin(i,ma,a,ione,m,row,rowlevs,heap,ktrw,trw,info) else ! ! Copy into trw the i-th local row of the matrix, stored in b - ! (as (i-ma)-th row) - ! + ! (as (i-ma)-th row) + ! call iluk_copyin(i-ma,mb,b,ione,m,row,rowlevs,heap,ktrw,trw,info) endif - + ! Do an elimination step on the current row. It turns out we only ! need to keep track of fill levels for the upper triangle, hence we ! do not have a lowlevs variable. @@ -417,7 +417,7 @@ contains & d,uja,uirp,uval,uplevs,nidx,idxs,info) ! ! Copy the row into lval/d(i)/uval - ! + ! if (info == psb_success_) call iluk_copyout(fill_in,ialg,i,m,row,rowlevs,nidx,idxs,& & l1,l2,lja,lirp,lval,d,uja,uirp,uval,uplevs,info) if (info /= psb_success_) then @@ -474,11 +474,11 @@ contains ! ! This routine is used by psb_ziluk_factint in the computation of the ! ILU(k)/MILU(k) factorization of a local sparse matrix. - ! + ! ! ! Arguments: ! i - integer, input. - ! The local index of the row to be extracted from the + ! The local index of the row to be extracted from the ! sparse matrix structure a. ! m - integer, input. ! The number of rows of the local matrix stored into a. @@ -510,7 +510,7 @@ contains ! staging buffer trw. See below. ! trw - type(psb_zspmat_type), input/output. ! A staging buffer. If the matrix A is not in CSR format, we use - ! the psb_sp_getblk routine and store its output in trw; when we + ! the psb_sp_getblk routine and store its output in trw; when we ! need to call psb_sp_getblk we do it for a block of rows, and then ! we consume them from trw in successive calls to this routine, ! until we empty the buffer. Thus we will make a call to psb_sp_getblk @@ -521,8 +521,8 @@ contains use psb_base_mod implicit none - - ! Arguments + + ! Arguments type(psb_zspmat_type), intent(in) :: a type(psb_z_coo_sparse_mat), intent(inout) :: trw integer(psb_ipk_), intent(in) :: i,m,jmin,jmax @@ -542,17 +542,17 @@ contains if (psb_errstatus_fatal()) then info = psb_err_internal_error_; goto 9999 end if - call heap%init(info) + call heap%init(info) - select type (aa=> a%a) - type is (psb_z_csr_sparse_mat) + select type (aa=> a%a) + type is (psb_z_csr_sparse_mat) ! ! Take a fast shortcut if the matrix is stored in CSR format ! - + do j = aa%irp(i), aa%irp(i+1) - 1 k = aa%ja(j) - if ((jmin<=k).and.(k<=jmax)) then + if ((jmin<=k).and.(k<=jmax)) then row(k) = aa%val(j) rowlevs(k) = 0 call heap%insert(k,info) @@ -562,14 +562,14 @@ contains class default ! - ! Otherwise use psb_sp_getblk, slower but able (in principle) of + ! Otherwise use psb_sp_getblk, slower but able (in principle) of ! handling any format. In this case, a block of rows is extracted ! instead of a single row, for performance reasons, and these ! rows are copied one by one into the array row, through successive ! calls to iluk_copyin. ! - if ((mod(i,nrb) == 1).or.(nrb == 1)) then + if ((mod(i,nrb) == 1).or.(nrb == 1)) then irb = min(m-i+1,nrb) call aa%csget(i,i+irb-1,trw,info) if (info /= psb_success_) then @@ -581,11 +581,11 @@ contains ktrw=1 end if nz = trw%get_nzeros() - do + do if (ktrw > nz) exit if (trw%ia(ktrw) > i) exit k = trw%ja(ktrw) - if ((jmin<=k).and.(k<=jmax)) then + if ((jmin<=k).and.(k<=jmax)) then row(k) = trw%val(ktrw) rowlevs(k) = 0 call heap%insert(k,info) @@ -674,10 +674,10 @@ contains use psb_base_mod - implicit none + implicit none ! Arguments - type(psb_i_heap), intent(inout) :: heap + type(psb_i_heap), intent(inout) :: heap integer(psb_ipk_), intent(in) :: i, fill_in integer(psb_ipk_), intent(inout) :: nidx,info integer(psb_ipk_), intent(inout) :: rowlevs(:) @@ -690,7 +690,7 @@ contains complex(psb_dpk_) :: rwk info = psb_success_ - if (.not.allocated(idxs)) then + if (.not.allocated(idxs)) then allocate(idxs(200),stat=info) if (info /= psb_success_) return endif @@ -702,34 +702,34 @@ contains ! do ! Beware: (iret < 0) means that the heap is empty, not an error. - call heap%get_first(k,iret) + call heap%get_first(k,iret) if (iret < 0) return - ! + ! ! Just in case an index has been put on the heap more than once. ! if (k == lastk) cycle - lastk = k + lastk = k nidx = nidx + 1 - if (nidx>size(idxs)) then + if (nidx>size(idxs)) then call psb_realloc(nidx+psb_heap_resize,idxs,info) if (info /= psb_success_) return end if idxs(nidx) = k - - if ((row(k) /= zzero).and.(rowlevs(k) <= fill_in).and.(ki) then + else if (j>i) then ! ! Copy the upper part of the row - ! - if (rowlevs(j) <= fill_in) then - l2 = l2 + 1 - if (size(uval) < l2) then - ! + ! + if (rowlevs(j) <= fill_in) then + l2 = l2 + 1 + if (size(uval) < l2) then + ! ! Figure out a good reallocation size! ! isz = max((l2/i)*m,int(1.2*l2),l2+100) - call psb_realloc(isz,uval,info) - if (info == psb_success_) call psb_realloc(isz,uja,info) + call psb_realloc(isz,uval,info) + if (info == psb_success_) call psb_realloc(isz,uja,info) if (info == psb_success_) call psb_realloc(isz,uplevs,info,pad=(m+1)) - if (info /= psb_success_) then + if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') goto 9999 @@ -941,11 +941,11 @@ contains end if uja(l2) = j uval(l2) = row(j) - uplevs(l2) = rowlevs(j) + uplevs(l2) = rowlevs(j) else if (ialg == psb_milu_n_) then ! ! MILU(k): add discarded entries to the diagonal one - ! + ! d(i) = d(i) + row(j) end if ! @@ -963,13 +963,13 @@ contains lirp(i+1) = l1 + 1 uirp(i+1) = l2 + 1 - ! + ! ! Check the pivot size ! if (abs(d(i)) < d_epstol) then ! ! Too small pivot: unstable factorization - ! + ! info = psb_err_pivot_too_small_ int_err(1) = i write(ch_err,'(g20.10)') d(i) diff --git a/prec/impl/psb_z_ilut_fact.f90 b/prec/impl/psb_z_ilut_fact.f90 index b7e8da05..291dc778 100644 --- a/prec/impl/psb_z_ilut_fact.f90 +++ b/prec/impl/psb_z_ilut_fact.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,21 +27,21 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! +! ! Moved here from MLD2P4, original copyright below. -! -! -! +! +! +! ! MLD2P4 version 2.2 ! MultiLevel Domain Decomposition Parallel Preconditioners Package ! based on PSBLAS (Parallel Sparse BLAS version 3.5) -! -! (C) Copyright 2008-2018 -! -! Salvatore Filippone -! Pasqua D'Ambra -! Daniela di Serafino -! +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -53,7 +53,7 @@ ! 3. The name of the MLD2P4 group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -65,8 +65,8 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! ! File: psb_zilut_fact.f90 ! ! Subroutine: psb_zilut_fact @@ -87,7 +87,7 @@ ! CSR. The diagonal of the U factor is stored separately (actually, the ! inverse of the diagonal entries is stored; this is then managed in the ! solve stage associated to the ILU(k,t) factorization). -! +! ! ! Arguments: ! fill_in - integer, input. @@ -114,7 +114,7 @@ ! factorization. ! Note: its allocation is managed by the calling routine psb_ilu_bld, ! hence it cannot be only intent(out). -! info - integer, output. +! info - integer, output. ! Error code. ! blck - type(psb_zspmat_type), input, optional, target. ! The sparse matrix structure containing the remote rows of the @@ -122,9 +122,9 @@ ! to build an Additive Schwarz base preconditioner with overlap ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck does not contain any row. -! +! subroutine psb_zilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) - + use psb_base_mod use psb_z_ilu_fact_mod, psb_protect_name => psb_zilut_fact @@ -141,7 +141,7 @@ subroutine psb_zilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) integer(psb_ipk_), intent(in), optional :: iscale ! Local Variables integer(psb_ipk_) :: l1, l2, m, err_act, iscale_ - + type(psb_zspmat_type), pointer :: blck_ type(psb_z_csr_sparse_mat) :: ll, uu real(psb_dpk_) :: scale @@ -151,34 +151,34 @@ subroutine psb_zilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) info = psb_success_ call psb_erractionsave(err_act) - if (fill_in < 0) then + if (fill_in < 0) then info=psb_err_input_asize_invalid_i_ call psb_errpush(info,name, & & i_err=(/ione,fill_in,izero,izero,izero/)) goto 9999 end if - ! + ! ! Point to / allocate memory for the incomplete factorization ! - if (present(blck)) then + if (present(blck)) then blck_ => blck else - allocate(blck_,stat=info) - if (info == psb_success_) call blck_%csall(izero,izero,info,ione) + allocate(blck_,stat=info) + if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR') if (info /= psb_success_) then info=psb_err_from_subroutine_ - ch_err='csall' + ch_err='allocate' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if endif - if (present(iscale)) then + if (present(iscale)) then iscale_ = iscale else iscale_ = psb_ilu_scale_none_ end if - - select case(iscale_) + + select case(iscale_) case(psb_ilu_scale_none_) scale = sone case(psb_ilu_scale_maxval_) @@ -189,10 +189,10 @@ subroutine psb_zilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) call psb_errpush(info,name,i_err=(/ione*9,iscale_,izero,izero,izero/)) goto 9999 end select - + m = a%get_nrows() + blck_%get_nrows() if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.& - & (m > size(d)) ) then + & (m > size(d)) ) then write(0,*) 'Wrong allocation status for L,D,U? ',& & l%get_nrows(),size(d),u%get_nrows() info = -1 @@ -225,15 +225,15 @@ subroutine psb_zilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) call u%set_triangle() call u%set_unit() call u%set_upper() - + ! ! Nullify pointer / deallocate memory ! - if (present(blck)) then - blck_ => null() + if (present(blck)) then + blck_ => null() else call blck_%free() - deallocate(blck_,stat=info) + deallocate(blck_,stat=info) if(info.ne.0) then info=psb_err_from_subroutine_ ch_err='psb_sp_free' @@ -298,7 +298,7 @@ contains ! according to the CSR storage format. ! lirp - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the L factor in lval, according to the CSR storage format. + ! of the L factor in lval, according to the CSR storage format. ! uval - complex(psb_dpk_), dimension(:), input/output. ! The U factor in the incomplete factorization. ! The entries of U are stored according to the CSR format. @@ -307,12 +307,12 @@ contains ! according to the CSR storage format. ! uirp - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row - ! of the U factor in uval, according to the CSR storage format. + ! of the U factor in uval, according to the CSR storage format. ! l1 - integer, output ! The number of nonzero entries in lval. ! l2 - integer, output ! The number of nonzero entries in uval. - ! info - integer, output. + ! info - integer, output. ! Error code. ! subroutine psb_zilut_factint(fill_in,thres,a,b,& @@ -320,7 +320,7 @@ contains use psb_base_mod - implicit none + implicit none ! Arguments integer(psb_ipk_), intent(in) :: fill_in @@ -355,7 +355,7 @@ contains m = ma+mb ! - ! Allocate a temporary buffer for the ilut_copyin function + ! Allocate a temporary buffer for the ilut_copyin function ! call trw%allocate(izero,izero,ione) if (info == psb_success_) call psb_ensure_size(m+1,lirp,info) @@ -366,7 +366,7 @@ contains call psb_errpush(info,name,a_err='psb_sp_all') goto 9999 end if - + l1=0 l2=0 lirp(1) = 1 @@ -396,10 +396,10 @@ contains ! in the ilut_copyin function, and updated during the elimination, in ! the ilut_fact routine. The heap is ideal because at each step we need ! the lowest index, but we also need to insert new items, and the heap - ! allows to do both in log time. + ! allows to do both in log time. ! d(i) = czero - if (i<=ma) then + if (i<=ma) then call ilut_copyin(i,ma,a,i,ione,m,nlw,nup,jmaxup,nrmi,weight,& & row,heap,ktrw,trw,info) else @@ -414,7 +414,7 @@ contains & d,uja,uirp,uval,nidx,idxs,info) ! ! Copy the row into lval/d(i)/uval - ! + ! if (info == psb_success_) call ilut_copyout(fill_in,thres,i,m,& & nlw,nup,jmaxup,nrmi,row,nidx,idxs,& & l1,l2,lja,lirp,lval,d,uja,uirp,uval,info) @@ -429,7 +429,7 @@ contains ! ! Adjust diagonal accounting for scale factor ! - if (weight /= sone) then + if (weight /= sone) then d(1:m) = d(1:m)*weight end if @@ -469,7 +469,7 @@ contains ! - storing into a heap the column indices of the nonzero entries of the copied ! row; ! - computing the column index of the first entry with maximum absolute value - ! in the part of the row belonging to the upper triangle; + ! in the part of the row belonging to the upper triangle; ! - computing the 2-norm of the row. ! The output array row is such that it contains a full row of A, i.e. it contains ! also the zero entries of the row. This is useful for the elimination step @@ -482,11 +482,11 @@ contains ! ! This routine is used by psb_zilut_factint in the computation of the ILU(k,t) ! factorization of a local sparse matrix. - ! + ! ! ! Arguments: ! i - integer, input. - ! The local index of the row to be extracted from the + ! The local index of the row to be extracted from the ! sparse matrix structure a. ! m - integer, input. ! The number of rows of the local matrix stored into a. @@ -533,7 +533,7 @@ contains ! staging buffer trw. See below. ! trw - type(psb_zspmat_type), input/output. ! A staging buffer. If the matrix A is not in CSR format, we use - ! the psb_sp_getblk routine and store its output in trw; when we + ! the psb_sp_getblk routine and store its output in trw; when we ! need to call psb_sp_getblk we do it for a block of rows, and then ! we consume them from trw in successive calls to this routine, ! until we empty the buffer. Thus we will make a call to psb_sp_getblk @@ -542,7 +542,7 @@ contains subroutine ilut_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,& & nrmi,weight,row,heap,ktrw,trw,info) use psb_base_mod - implicit none + implicit none type(psb_zspmat_type), intent(in) :: a type(psb_z_coo_sparse_mat), intent(inout) :: trw integer(psb_ipk_), intent(in) :: i, m,jmin,jmax,jd @@ -584,29 +584,29 @@ contains dmaxup = szero nrmi = szero - select type (aa=> a%a) - type is (psb_z_csr_sparse_mat) + select type (aa=> a%a) + type is (psb_z_csr_sparse_mat) ! ! Take a fast shortcut if the matrix is stored in CSR format ! do j = aa%irp(i), aa%irp(i+1) - 1 k = aa%ja(j) - if ((jmin<=k).and.(k<=jmax)) then - row(k) = aa%val(j)*weight + if ((jmin<=k).and.(k<=jmax)) then + row(k) = aa%val(j)*weight call heap%insert(k,info) if (info /= psb_success_) exit - if (kjd) then + if (kjd) then nup = nup + 1 - if (abs(row(k))>dmaxup) then + if (abs(row(k))>dmaxup) then jmaxup = k dmaxup = abs(row(k)) end if end if end if end do - if (info /= psb_success_) then + if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='psb_insert_heap') goto 9999 @@ -617,16 +617,16 @@ contains class default - + ! - ! Otherwise use psb_sp_getblk, slower but able (in principle) of + ! Otherwise use psb_sp_getblk, slower but able (in principle) of ! handling any format. In this case, a block of rows is extracted ! instead of a single row, for performance reasons, and these ! rows are copied one by one into the array row, through successive ! calls to ilut_copyin. ! - if ((mod(i,nrb) == 1).or.(nrb == 1)) then + if ((mod(i,nrb) == 1).or.(nrb == 1)) then irb = min(m-i+1,nrb) call aa%csget(i,i+irb-1,trw,info) if (info /= psb_success_) then @@ -639,18 +639,18 @@ contains kin = ktrw nz = trw%get_nzeros() - do + do if (ktrw > nz) exit if (trw%ia(ktrw) > i) exit k = trw%ja(ktrw) - if ((jmin<=k).and.(k<=jmax)) then + if ((jmin<=k).and.(k<=jmax)) then row(k) = trw%val(ktrw)*weight call heap%insert(k,info) if (info /= psb_success_) exit - if (kjd) then + if (kjd) then nup = nup + 1 - if (abs(row(k))>dmaxup) then + if (abs(row(k))>dmaxup) then jmaxup = k dmaxup = abs(row(k)) end if @@ -734,10 +734,10 @@ contains use psb_base_mod - implicit none + implicit none ! Arguments - type(psb_i_heap), intent(inout) :: heap + type(psb_i_heap), intent(inout) :: heap integer(psb_ipk_), intent(in) :: i integer(psb_ipk_), intent(inout) :: nidx,info real(psb_dpk_), intent(in) :: thres,nrmi @@ -753,23 +753,23 @@ contains call psb_ensure_size(200*ione,idxs,info) if (info /= psb_success_) return nidx = 0 - lastk = -1 + lastk = -1 ! ! Do while there are indices to be processed ! do - call heap%get_first(k,iret) + call heap%get_first(k,iret) if (iret < 0) exit - ! + ! ! An index may have been put on the heap more than once. ! if (k == lastk) cycle - lastk = k + lastk = k lowert: if (k nidx) exit if (idxs(idxp) >= i) exit @@ -987,8 +987,8 @@ contains ! if (abs(witem) < thres*nrmi) cycle - nz = nz + 1 - xw(nz) = witem + nz = nz + 1 + xw(nz) = witem xwid(nz) = widx call heap%insert(witem,widx,info) if (info /= psb_success_) then @@ -1000,9 +1000,9 @@ contains ! ! Now we have to take out the first nlw+fill_in entries - ! + ! if (nz <= nlw+fill_in) then - ! + ! ! Just copy everything from xw, and it is already ordered ! else @@ -1014,7 +1014,7 @@ contains call psb_errpush(info,name,a_err='psb_heap_get_first') goto 9999 end if - + xw(k) = witem xwid(k) = widx end do @@ -1029,15 +1029,15 @@ contains ! Copy out the lower part of the row ! do k=1,nz - l1 = l1 + 1 + l1 = l1 + 1 if (size(lval) < l1) then - ! + ! ! Figure out a good reallocation size! - ! + ! isz = (max((l1/i)*m,int(1.2*l1),l1+100)) - call psb_realloc(isz,lval,info) - if (info == psb_success_) call psb_realloc(isz,lja,info) - if (info /= psb_success_) then + call psb_realloc(isz,lval,info) + if (info == psb_success_) call psb_realloc(isz,lja,info) + if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') goto 9999 @@ -1046,25 +1046,25 @@ contains lja(l1) = xwid(k) lval(l1) = xw(indx(k)) end do - + ! ! Make sure idxp points to the diagonal entry ! - if (idxp <= size(idxs)) then - if (idxs(idxp) < i) then - do + if (idxp <= size(idxs)) then + if (idxs(idxp) < i) then + do idxp = idxp + 1 if (idxp > nidx) exit if (idxs(idxp) >= i) exit end do end if end if - if (idxp > size(idxs)) then + if (idxp > size(idxs)) then !!$ write(0,*) 'Warning: missing diagonal element in the row ' else - if (idxs(idxp) > i) then + if (idxs(idxp) > i) then !!$ write(0,*) 'Warning: missing diagonal element in the row ' - else if (idxs(idxp) /= i) then + else if (idxs(idxp) /= i) then !!$ write(0,*) 'Warning: impossible error: diagonal has vanished' else ! @@ -1076,7 +1076,7 @@ contains if (abs(d(i)) < d_epstol) then ! ! Too small pivot: unstable factorization - ! + ! info = psb_err_pivot_too_small_ int_err(1) = i write(ch_err,'(g20.10)') d(i) @@ -1092,7 +1092,7 @@ contains end if ! - ! Now the upper part + ! Now the upper part ! call heap%init(info,dir=psb_asort_down_) @@ -1104,15 +1104,15 @@ contains nz = 0 do - + idxp = idxp + 1 if (idxp > nidx) exit widx = idxs(idxp) - if (widx <= i) then + if (widx <= i) then !!$ write(0,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp) cycle end if - if (widx > m) then + if (widx > m) then !!$ write(0,*) 'Warning: impossible value',widx,i,idxp,idxs(idxp) cycle end if @@ -1120,12 +1120,12 @@ contains ! ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway. ! - if ((widx /= jmaxup) .and. (abs(witem) < thres*nrmi)) then - cycle + if ((widx /= jmaxup) .and. (abs(witem) < thres*nrmi)) then + cycle end if nz = nz + 1 - xw(nz) = witem + xw(nz) = witem xwid(nz) = widx call heap%insert(witem,widx,info) if (info /= psb_success_) then @@ -1133,7 +1133,7 @@ contains call psb_errpush(info,name,a_err='psb_insert_heap') goto 9999 end if - + end do ! @@ -1141,7 +1141,7 @@ contains ! we include entry jmaxup. ! if (nz <= nup+fill_in) then - ! + ! ! Just copy everything from xw ! fndmaxup=.true. @@ -1155,13 +1155,13 @@ contains if (widx == jmaxup) fndmaxup=.true. end do end if - if ((i psb_z_invk_bld + use psb_z_ilu_fact_mod + implicit none + + ! Arguments + type(psb_zspmat_type), intent(in), target :: a + integer(psb_ipk_), intent(in) :: fill1, fill2 + type(psb_zspmat_type), intent(inout) :: lmat, umat + complex(psb_dpk_), allocatable :: d(:) + Type(psb_desc_type), Intent(inout) :: desc + integer(psb_ipk_), intent(out) :: info + type(psb_zspmat_type), intent(in), optional :: blck + ! + integer(psb_ipk_) :: i, nztota, err_act, n_row, nrow_a, n_col + type(psb_zspmat_type) :: atmp + complex(psb_dpk_), allocatable :: pq(:), pd(:) + integer(psb_ipk_), allocatable :: uplevs(:) + integer(psb_ipk_) :: debug_level, debug_unit + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np,me + integer(psb_ipk_) :: nzrmax + character(len=20) :: name, ch_err + + + info = psb_success_ + name='psb_zinvk_bld' + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + ctxt = psb_cd_get_context(desc) + call psb_info(ctxt, me, np) + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' start' + + ! + ! Check the memory available to hold the incomplete L and U factors + ! and allocate it if needed + ! + nrow_a = a%get_nrows() + nztota = a%get_nzeros() + + if (present(blck)) then + nztota = nztota + blck%get_nzeros() + end if + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & ': out get_nnzeros',nrow_a,nztota,& + & a%get_nrows(),a%get_ncols(),a%get_nzeros() + + + n_row = psb_cd_get_local_rows(desc) + n_col = psb_cd_get_local_cols(desc) + allocate(pd(n_row),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + + + call lmat%allocate(n_row,n_row,info,nz=nztota) + if (info == psb_success_) call umat%allocate(n_row,n_row,info,nz=nztota) + + + call psb_iluk_fact(fill1,psb_ilu_n_,a,lmat,umat,pd,info,blck=blck) + !,uplevs=uplevs) + !call psb_ziluk_fact(fill1,psb_ilu_n_,a,lmat,umat,pd,info,blck=blck) + + if (info == psb_success_) call atmp%allocate(n_row,n_row,info,nz=nztota) + if(info/=0) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + + ! + ! Compute the aprox U^-1 and L^-1 + ! + call psb_sparse_invk(n_row,umat,atmp,fill2,info) + if (info == psb_success_) call psb_move_alloc(atmp,umat,info) + if (info == psb_success_) call lmat%transp() + if (info == psb_success_) call psb_sparse_invk(n_row,lmat,atmp,fill2,info) + if (info == psb_success_) call psb_move_alloc(atmp,lmat,info) + if (info == psb_success_) call lmat%transp() + ! Done. Hopefully.... + + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invt') + goto 9999 + end if + + call psb_move_alloc(pd,d,info) + call lmat%set_asb() + call lmat%trim() + call umat%set_asb() + call umat%trim() + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' end' + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_z_invk_bld + +subroutine psb_zsparse_invk(n,a,z,fill_in,info,inlevs) + + use psb_base_mod + use psb_z_invk_fact_mod, psb_protect_name => psb_zsparse_invk + + integer(psb_ipk_), intent(in) :: n + type(psb_zspmat_type), intent(in) :: a + type(psb_zspmat_type), intent(inout) :: z + integer(psb_ipk_), intent(in) :: fill_in + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: inlevs(:) + ! + integer(psb_ipk_) :: i,j,k, err_act, nz, nzra, nzrz, ipz1,ipz2, nzz, ip1, ip2, l2 + integer(psb_ipk_), allocatable :: ia(:), ja(:), iz(:), jz(:) + complex(psb_dpk_), allocatable :: zw(:), val(:), valz(:) + integer(psb_ipk_), allocatable :: uplevs(:), rowlevs(:), idxs(:) + complex(psb_dpk_), allocatable :: row(:) + type(psb_z_coo_sparse_mat) :: trw + type(psb_z_csr_sparse_mat) :: acsr, zcsr + integer(psb_ipk_) :: ktrw, nidx + type(psb_i_heap) :: heap + + real(psb_dpk_) :: alpha + character(len=20) :: name='psb_sp_invk' + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + if (.not.(a%is_triangle().and.a%is_unit().and.a%is_upper())) then + write(psb_err_unit,*) 'Wrong A ' + info = psb_err_internal_error_ + call psb_errpush(psb_err_internal_error_,name,a_err='wrong A') + goto 9999 + end if + call a%cp_to(acsr) + call trw%allocate(izero,izero,ione) + if (info == psb_success_) allocate(zw(n),iz(n),valz(n),& + & row(n),rowlevs(n),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + + allocate(uplevs(acsr%get_nzeros()),stat=info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='Allocate') + goto 9999 + end if + uplevs(:) = 0 + row(:) = zzero + rowlevs(:) = -(n+1) + + call zcsr%allocate(n,n,n*fill_in) + call zcsr%set_triangle() + call zcsr%set_unit(.false.) + call zcsr%set_upper() + call psb_ensure_size(n+1, idxs, info) + + + ! + ! + zcsr%irp(1) = 1 + nzz = 0 + + l2 = 0 + outer: do i = 1, n-1 + ! ZW = e_i + call psb_invk_copyin(i,n,acsr,ione,n,row,rowlevs,heap,ktrw,trw,info,& + & sign=-done,inlevs=inlevs) + row(i) = zone + rowlevs(i) = 0 + + ! Update loop + call psb_invk_inv(fill_in,i,row,rowlevs,heap,& + & acsr%ja,acsr%irp,acsr%val,uplevs,nidx,idxs,info) + + call psb_invk_copyout(fill_in,i,n,row,rowlevs,nidx,idxs,& + & l2,zcsr%ja,zcsr%irp,zcsr%val,info) + + nzz = l2 + end do outer + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='mainloop') + goto 9999 + end if + ipz1 = nzz+1 + call psb_ensure_size(ipz1,zcsr%val,info) + call psb_ensure_size(ipz1,zcsr%ja,info) + zcsr%val(ipz1) = zone + zcsr%ja(ipz1) = n + zcsr%irp(n+1) = ipz1+1 + call zcsr%set_sorted() + call z%mv_from(zcsr) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_zsparse_invk + +subroutine psb_z_invk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,ktrw,trw,info,sign,inlevs) + + use psb_base_mod + use psb_z_invk_fact_mod, psb_protect_name => psb_z_invk_copyin + + implicit none + + ! Arguments + type(psb_z_csr_sparse_mat), intent(in) :: a + type(psb_z_coo_sparse_mat), intent(inout) :: trw + integer(psb_ipk_), intent(in) :: i,m,jmin,jmax + integer(psb_ipk_), intent(inout) :: ktrw,info + integer(psb_ipk_), intent(inout) :: rowlevs(:) + complex(psb_dpk_), intent(inout) :: row(:) + type(psb_i_heap), intent(inout) :: heap + real(psb_dpk_), optional, intent(in) :: sign + integer(psb_ipk_), intent(in), optional :: inlevs(:) + + ! Local variables + integer(psb_ipk_) :: k,j,irb,err_act, nz + integer(psb_ipk_), parameter :: nrb=16 + real(psb_dpk_) :: sign_ + character(len=20), parameter :: name='invk_copyin' + character(len=20) :: ch_err + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + call heap%init(info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_init_heap') + goto 9999 + end if + + if (present(sign)) then + sign_ = sign + else + sign_ = done + end if + + + ! + ! Take a fast shortcut if the matrix is stored in CSR format + ! + if (present(inlevs)) then + do j = a%irp(i), a%irp(i+1) - 1 + k = a%ja(j) + if ((jmin<=k).and.(k<=jmax)) then + row(k) = sign_ * a%val(j) + rowlevs(k) = inlevs(j) + call heap%insert(k,info) + end if + end do + else + do j = a%irp(i), a%irp(i+1) - 1 + k = a%ja(j) + if ((jmin<=k).and.(k<=jmax)) then + row(k) = sign_ * a%val(j) + rowlevs(k) = 0 + call heap%insert(k,info) + end if + end do + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_z_invk_copyin + + +subroutine psb_z_invk_copyout(fill_in,i,m,row,rowlevs,nidx,idxs,& + & l2,uia1,uia2,uaspk,info) + + use psb_base_mod + use psb_z_invk_fact_mod, psb_protect_name => psb_z_invk_copyout + + implicit none + + ! Arguments + integer(psb_ipk_), intent(in) :: fill_in, i, m, nidx + integer(psb_ipk_), intent(inout) :: l2, info + integer(psb_ipk_), intent(inout) :: rowlevs(:), idxs(:) + integer(psb_ipk_), allocatable, intent(inout) :: uia1(:), uia2(:) + complex(psb_dpk_), allocatable, intent(inout) :: uaspk(:) + complex(psb_dpk_), intent(inout) :: row(:) + + ! Local variables + integer(psb_ipk_) :: j,isz,err_act,int_err(5),idxp + character(len=20), parameter :: name='psb_ziluk_factint' + character(len=20) :: ch_err + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + do idxp=1,nidx + + j = idxs(idxp) + + if (j>=i) then + ! + ! Copy the upper part of the row + ! + if (rowlevs(j) <= fill_in) then + l2 = l2 + 1 + if (size(uaspk) < l2) then + ! + ! Figure out a good reallocation size! + ! + isz = max(int(1.2*l2),l2+100) + call psb_realloc(isz,uaspk,info) + if (info == psb_success_) call psb_realloc(isz,uia1,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='Allocate') + goto 9999 + end if + end if + uia1(l2) = j + uaspk(l2) = row(j) + end if + ! + ! Re-initialize row(j) and rowlevs(j) + ! + row(j) = zzero + rowlevs(j) = -(m+1) + end if + end do + + uia2(i+1) = l2 + 1 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_z_invk_copyout + +subroutine psb_zinvk_inv(fill_in,i,row,rowlevs,heap,ja,irp,val,uplevs,& + & nidx,idxs,info) + + use psb_base_mod + use psb_z_invk_fact_mod, psb_protect_name => psb_zinvk_inv + + implicit none + + ! Arguments + type(psb_i_heap), intent(inout) :: heap + integer(psb_ipk_), intent(in) :: i, fill_in + integer(psb_ipk_), intent(inout) :: nidx,info + integer(psb_ipk_), intent(inout) :: rowlevs(:) + integer(psb_ipk_), allocatable, intent(inout) :: idxs(:) + integer(psb_ipk_), intent(in) :: ja(:),irp(:),uplevs(:) + complex(psb_dpk_), intent(in) :: val(:) + complex(psb_dpk_), intent(inout) :: row(:) + + ! Local variables + integer(psb_ipk_) :: k,j,lrwk,jj,lastk, iret + real(psb_dpk_) :: rwk + + + info = psb_success_ + + call psb_ensure_size(200, idxs, info) + if (info /= psb_success_) return + nidx = 1 + idxs(1) = i + lastk = i + + ! + ! Do while there are indices to be processed + ! + do + ! Beware: (iret < 0) means that the heap is empty, not an error. + call heap%get_first(k,iret) + if (iret < 0) then +!!$ write(psb_err_unit,*) 'IINVK: ',i,' returning at ',lastk + return + end if + + ! + ! Just in case an index has been put on the heap more than once. + ! + if (k == lastk) cycle + + lastk = k + nidx = nidx + 1 + if (nidx>size(idxs)) then + call psb_realloc(nidx+psb_heap_resize,idxs,info) + if (info /= psb_success_) return + end if + idxs(nidx) = k + + if ((row(k) /= zzero).and.(rowlevs(k) <= fill_in)) then + ! + ! Note: since U is scaled while copying it out (see iluk_copyout), + ! we can use rwk in the update below + ! + rwk = row(k) + lrwk = rowlevs(k) + + do jj=irp(k),irp(k+1)-1 + j = ja(jj) + if (j<=k) then + info = -i + return + endif + ! + ! Insert the index into the heap for further processing. + ! The fill levels are initialized to a negative value. If we find + ! one, it means that it is an as yet untouched index, so we need + ! to insert it; otherwise it is already on the heap, there is no + ! need to insert it more than once. + ! + if (rowlevs(j)<0) then + call heap%insert(j,info) + if (info /= psb_success_) return + rowlevs(j) = abs(rowlevs(j)) + end if + ! + ! Update row(j) and the corresponding fill level + ! + row(j) = row(j) - rwk * val(jj) + rowlevs(j) = min(rowlevs(j),lrwk+uplevs(jj)+1) + end do + + end if + end do + +end subroutine psb_zinvk_inv diff --git a/prec/impl/psb_z_invt_fact.f90 b/prec/impl/psb_z_invt_fact.f90 new file mode 100644 index 00000000..bed713ae --- /dev/null +++ b/prec/impl/psb_z_invt_fact.f90 @@ -0,0 +1,739 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG4PSBLAS, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! + +subroutine psb_z_invt_bld(a,fillin,invfill,thresh,invthresh,& + & lmat,d,umat,desc,info,blck) + + use psb_base_mod + use psb_z_invt_fact_mod, psb_protect_name => psb_z_invt_bld + use psb_z_ilu_fact_mod + implicit none + + ! Arguments + type(psb_zspmat_type), intent(in), target :: a + integer(psb_ipk_), intent(in) :: fillin,invfill + real(psb_dpk_), intent(in) :: thresh + real(psb_dpk_), intent(in) :: invthresh + type(psb_zspmat_type), intent(inout) :: lmat, umat + complex(psb_dpk_), allocatable :: d(:) + Type(psb_desc_type), Intent(inout) :: desc + integer(psb_ipk_), intent(out) :: info + type(psb_zspmat_type), intent(in), optional :: blck + ! + integer(psb_ipk_) :: i, nztota, err_act, n_row, nrow_a, n_col + type(psb_zspmat_type) :: atmp + complex(psb_dpk_), allocatable :: pq(:), pd(:), w(:) + integer(psb_ipk_) :: debug_level, debug_unit + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np,me + integer(psb_ipk_) :: nzrmax + real(psb_dpk_) :: sp_thresh + character(len=20) :: name, ch_err, fname + + + info = psb_success_ + name='psb_zinvt_fact' + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + ctxt = psb_cd_get_context(desc) + call psb_info(ctxt, me, np) + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' start' + + ! + ! Check the memory available to hold the incomplete L and U factors + ! and allocate it if needed + ! + nrow_a = a%get_nrows() + nztota = a%get_nzeros() + + if (present(blck)) then + nztota = nztota + blck%get_nzeros() + end if + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & ': out get_nnzeros',nrow_a,nztota,& + & a%get_nrows(),a%get_ncols(),a%get_nzeros() + + + n_row = psb_cd_get_local_rows(desc) + n_col = psb_cd_get_local_cols(desc) + allocate(pd(n_row),w(n_row),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + + nzrmax = fillin + sp_thresh = thresh + + call lmat%allocate(n_row,n_row,info,nz=nztota) + if (info == psb_success_) call umat%allocate(n_row,n_row,info,nz=nztota) + + if (info == 0) call psb_ilut_fact(nzrmax,sp_thresh,& + & a,lmat,umat,pd,info,blck=blck,iscale=psb_ilu_scale_maxval_) + + if (info == psb_success_) call atmp%allocate(n_row,n_row,info,nz=nztota) + if(info/=0) then + info=psb_err_from_subroutine_ + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (.false.) then +!!$ if (debug_level >= psb_debug_inner_) then + write(fname,'(a,i0,a)') 'invt-lo-',me,'.mtx' + call lmat%print(fname,head="INVTLOW") + write(fname,'(a,i0,a)') 'invt-up-',me,'.mtx' + call umat%print(fname,head="INVTUPP") + end if + + ! + ! Compute the approx U^-1 and L^-1 + ! + nzrmax = invfill + call psb_zsparse_invt(n_row,umat,atmp,nzrmax,invthresh,info) + if (info == psb_success_) call psb_move_alloc(atmp,umat,info) + if (info == psb_success_) call lmat%transp() + if (info == psb_success_) call psb_zsparse_invt(n_row,lmat,atmp,nzrmax,invthresh,info) + if (info == psb_success_) call psb_move_alloc(atmp,lmat,info) + if (info == psb_success_) call lmat%transp() + ! Done. Hopefully.... + + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invt') + goto 9999 + end if + + call psb_move_alloc(pd,d,info) + call lmat%set_asb() + call lmat%trim() + call umat%set_asb() + call umat%trim() + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),' end' + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_z_invt_bld + +subroutine psb_zsparse_invt(n,a,z,nzrmax,sp_thresh,info) + + use psb_base_mod + use psb_z_invt_fact_mod, psb_protect_name => psb_zsparse_invt + + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_zspmat_type), intent(in) :: a + type(psb_zspmat_type), intent(inout) :: z + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_dpk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: i,j,k, err_act, nz, nzra, nzrz, ipz1,ipz2, nzz, ip1, ip2, l2 + integer(psb_ipk_), allocatable :: ia(:), ja(:), iz(:),jz(:) + complex(psb_dpk_), allocatable :: zw(:), val(:), valz(:) + integer(psb_ipk_), allocatable :: uplevs(:), rowlevs(:),idxs(:) + complex(psb_dpk_), allocatable :: row(:) + type(psb_z_coo_sparse_mat) :: trw + type(psb_z_csr_sparse_mat) :: acsr, zcsr + integer(psb_ipk_) :: ktrw, nidx, nlw,nup,jmaxup + type(psb_i_heap) :: heap + real(psb_dpk_) :: alpha, nrmi + character(len=20) :: name='psb_sp_invt' + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + if (.not.(a%is_triangle().and.a%is_unit().and.a%is_upper())) then + write(psb_err_unit,*) 'Wrong A ' + info = psb_err_internal_error_ + call psb_errpush(psb_err_internal_error_,name,a_err='wrong A') + goto 9999 + end if + call a%cp_to(acsr) + call trw%allocate(izero,izero,ione) + if (info == psb_success_) allocate(zw(n),iz(n),valz(n),& + & row(n),rowlevs(n),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + + call zcsr%allocate(n,n,n*nzrmax) + call zcsr%set_triangle() + call zcsr%set_unit(.false.) + call zcsr%set_upper() + ! + ! + nzz = 0 + row(:) = zzero + rowlevs(:) = 0 + l2 = 0 + zcsr%irp(1) = 1 + + outer: do i = 1, n-1 + ! ZW = e_i + call psb_z_invt_copyin(i,n,acsr,i,ione,n,nlw,nup,jmaxup,nrmi,row,& + & heap,rowlevs,ktrw,trw,info,sign=-done) + if (info /= 0) exit + row(i) = zone + ! Adjust norm + if (nrmi < done) then + nrmi = sqrt(done + nrmi**2) + else + nrmi = nrmi*sqrt(zone+zone/(nrmi**2)) + end if + + call psb_invt_inv(sp_thresh,i,nrmi,row,heap,rowlevs,& + & acsr%ja,acsr%irp,acsr%val,nidx,idxs,info) + if (info /= 0) exit +!!$ write(0,*) 'Calling copyout ',nzrmax,nlw,nup,nidx,l2 + call psb_z_invt_copyout(nzrmax,sp_thresh,i,n,nlw,nup,jmaxup,nrmi,row,& + & nidx,idxs,l2,zcsr%ja,zcsr%irp,zcsr%val,info) + if (info /= 0) exit + nzz = l2 + end do outer + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='mainloop') + goto 9999 + end if + + ipz1 = nzz+1 + call psb_ensure_size(ipz1,zcsr%val,info) + call psb_ensure_size(ipz1,zcsr%ja,info) + zcsr%val(ipz1) = zone + zcsr%ja(ipz1) = n + zcsr%irp(n+1) = ipz1+1 + + call z%mv_from(zcsr) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_zsparse_invt + +subroutine psb_z_invt_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,nrmi,row,heap,& + & irwt,ktrw,trw,info,sign) + use psb_base_mod + use psb_d_invt_fact_mod, psb_protect_name => psb_d_invt_copyin + implicit none + type(psb_z_csr_sparse_mat), intent(in) :: a + type(psb_z_coo_sparse_mat), intent(inout) :: trw + integer(psb_ipk_), intent(in) :: i, m,jmin,jmax,jd + integer(psb_ipk_), intent(inout) :: ktrw,nlw,nup,jmaxup,info + integer(psb_ipk_), intent(inout) :: irwt(:) + real(psb_dpk_), intent(inout) :: nrmi + complex(psb_dpk_), intent(inout) :: row(:) + type(psb_i_heap), intent(inout) :: heap + real(psb_dpk_), intent(in), optional :: sign + ! + integer(psb_ipk_) :: k,j,irb,kin,nz, err_act + integer(psb_ipk_), parameter :: nrb=16 + real(psb_dpk_) :: dmaxup, sign_ + real(psb_dpk_), external :: dnrm2 + character(len=20), parameter :: name='invt_copyin' + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + call heap%init(info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_init_heap') + goto 9999 + end if + sign_ = done + if (present(sign)) sign_ = sign + ! + ! nrmi is the norm of the current sparse row (for the time being, + ! we use the 2-norm). + ! NOTE: the 2-norm below includes also elements that are outside + ! [jmin:jmax] strictly. Is this really important? TO BE CHECKED. + ! + + nlw = 0 + nup = 0 + jmaxup = 0 + dmaxup = zzero + nrmi = zzero + + do j = a%irp(i), a%irp(i+1) - 1 + k = a%ja(j) + if ((jmin<=k).and.(k<=jmax)) then + row(k) = sign_ * a%val(j) + call heap%insert(k,info) + irwt(k) = 1 + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + goto 9999 + end if + end if + if (kjd) then + nup = nup + 1 + if (abs(row(k))>dmaxup) then + jmaxup = k + dmaxup = abs(row(k)) + end if + end if + end do + nz = a%irp(i+1) - a%irp(i) + nrmi = dnrm2(nz,a%val(a%irp(i):),ione) + + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_z_invt_copyin + +subroutine psb_z_invt_copyout(fill_in,thres,i,m,nlw,nup,jmaxup,nrmi,row, & + & nidx,idxs,l2,ja,irp,val,info) + + use psb_base_mod + use psb_z_invt_fact_mod, psb_protect_name => psb_z_invt_copyout + + implicit none + + ! Arguments + integer(psb_ipk_), intent(in) :: fill_in,i,m,nidx,nlw,nup,jmaxup + integer(psb_ipk_), intent(in) :: idxs(:) + integer(psb_ipk_), intent(inout) :: l2, info + integer(psb_ipk_), allocatable, intent(inout) :: ja(:),irp(:) + real(psb_dpk_), intent(in) :: thres,nrmi + complex(psb_dpk_),allocatable, intent(inout) :: val(:) + complex(psb_dpk_), intent(inout) :: row(:) + + ! Local variables + real(psb_dpk_),allocatable :: xw(:) + integer(psb_ipk_), allocatable :: xwid(:), indx(:) + real(psb_dpk_) :: witem, wmin + integer(psb_ipk_) :: widx + integer(psb_ipk_) :: k,isz,err_act,int_err(5),idxp, nz + type(psb_d_idx_heap) :: heap + character(len=20), parameter :: name='invt_copyout' + character(len=20) :: ch_err + logical :: fndmaxup + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + ! + ! Here we need to apply also the dropping rule base on the fill-in. + ! We do it by putting into a heap the elements that are not dropped + ! by using the 2-norm rule, and then copying them out. + ! + ! The heap goes down on the entry absolute value, so the first item + ! is the largest absolute value. + ! +!!$ write(0,*) 'invt_copyout ',nidx,nup+fill_in + call heap%init(info,dir=psb_asort_down_) + + if (info == psb_success_) allocate(xwid(nidx),xw(nidx),indx(nidx),stat=info) + if (info /= psb_success_) then + info=psb_err_alloc_request_ + call psb_errpush(info,name,i_err=(/3*nidx/),& + & a_err='real(psb_dpk_)') + goto 9999 + end if + + ! + ! First the lower part + ! + + nz = 0 + idxp = 0 + + do + + idxp = idxp + 1 + if (idxp > nidx) exit + if (idxs(idxp) >= i) exit + widx = idxs(idxp) + witem = row(widx) + ! + ! Dropping rule based on the 2-norm + ! + if (abs(witem) < thres*nrmi) cycle + + nz = nz + 1 + xw(nz) = witem + xwid(nz) = widx + call heap%insert(witem,widx,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + goto 9999 + end if + end do + + if (nz > 1) then + write(psb_err_unit,*) 'Warning: lower triangle from invt???? ' + end if + + + if (idxp <= size(idxs)) then + if (idxs(idxp) < i) then + do + idxp = idxp + 1 + if (idxp > nidx) exit + if (idxs(idxp) >= i) exit + end do + end if + end if + idxp = idxp - 1 + nz = 0 + wmin=HUGE(wmin) + if (.false.) then + do + + idxp = idxp + 1 + if (idxp > nidx) exit + widx = idxs(idxp) + if (widx < i) then + write(psb_err_unit,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp) + cycle + end if + if (widx > m) then + cycle + end if + witem = row(widx) + ! + ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway. + ! + if ((widx /= jmaxup) .and. (widx /= i) .and. (abs(witem) < thres*nrmi)) then + cycle + end if + if ((widx/=jmaxup).and.(nz > nup+fill_in)) then + if (abs(witem) < wmin) cycle + endif + wmin = min(abs(witem),wmin) + nz = nz + 1 + xw(nz) = witem + xwid(nz) = widx + call heap%insert(witem,widx,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + goto 9999 + end if + + end do + + ! + ! Now we have to take out the first nup-fill_in entries. But make sure + ! we include entry jmaxup. + ! + if (nz <= nup+fill_in) then + ! + ! Just copy everything from xw + ! + fndmaxup=.true. + else + fndmaxup = .false. + nz = nup+fill_in + do k=1,nz + call heap%get_first(witem,widx,info) + xw(k) = witem + xwid(k) = widx + if (widx == jmaxup) fndmaxup=.true. + end do + end if + if ((i nidx) exit + widx = idxs(idxp) + if (widx < i) then + write(psb_err_unit,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp) + cycle + end if + if (widx > m) then + cycle + end if + witem = row(widx) + ! + ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway. + ! + if ((widx /= i) .and. (abs(witem) < thres*nrmi)) then + cycle + end if + if (nz > nup+fill_in) then + if (abs(witem) < wmin) cycle + endif + wmin = min(abs(witem),wmin) + nz = nz + 1 + xw(nz) = witem + xwid(nz) = widx + call heap%insert(witem,widx,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + goto 9999 + end if + + end do + + ! + ! Now we have to take out the first nup-fill_in entries. But make sure + ! we include entry jmaxup. + ! + if (nz > nup+fill_in) then + nz = nup+fill_in + do k=1,nz + call heap%get_first(witem,widx,info) + xw(k) = witem + xwid(k) = widx + end do + end if + end if + + ! + ! Now we put things back into ascending column order + ! + call psb_msort(xwid(1:nz),indx(1:nz),dir=psb_sort_up_) + + ! + ! Copy out the upper part of the row + ! + do k=1,nz + l2 = l2 + 1 + if (size(val) < l2) then + ! + ! Figure out a good reallocation size! + ! + isz = max(int(1.2*l2),l2+100) + call psb_realloc(isz,val,info) + if (info == psb_success_) call psb_realloc(isz,ja,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='Allocate') + goto 9999 + end if + end if + ja(l2) = xwid(k) + val(l2) = xw(indx(k)) + end do + + ! + ! Set row to zero + ! + do idxp=1,nidx + row(idxs(idxp)) = zzero + end do + + irp(i+1) = l2 + 1 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +end subroutine psb_z_invt_copyout + +subroutine psb_z_invt_inv(thres,i,nrmi,row,heap,irwt,ja,irp,val,nidx,idxs,info) + + use psb_base_mod + use psb_z_invt_fact_mod, psb_protect_name => psb_z_invt_inv + + implicit none + + ! Arguments + type(psb_i_heap), intent(inout) :: heap + integer(psb_ipk_), intent(in) :: i + integer(psb_ipk_), intent(inout) :: nidx,info + integer(psb_ipk_), intent(inout) :: irwt(:) + real(psb_dpk_), intent(in) :: thres,nrmi + integer(psb_ipk_), allocatable, intent(inout) :: idxs(:) + integer(psb_ipk_), intent(in) :: ja(:),irp(:) + complex(psb_dpk_), intent(in) :: val(:) + complex(psb_dpk_), intent(inout) :: row(:) + + ! Local Variables + integer(psb_ipk_) :: k,j,jj,lastk,iret + real(psb_dpk_) :: rwk, alpha + + info = psb_success_ + + call psb_ensure_size(200, idxs, info) + if (info /= psb_success_) return + nidx = 1 + idxs(1) = i + lastk = i + irwt(i) = 1 +!!$ write(0,*) 'Drop Threshold ',thres*nrmi + ! + ! Do while there are indices to be processed + ! + do + + call heap%get_first(k,iret) + if (iret < 0) exit + + ! + ! An index may have been put on the heap more than once. + ! Should not happen, but just in case. + ! + if (k == lastk) cycle + lastk = k + + ! + ! Dropping rule based on the threshold: compare the absolute + ! value of each updated entry of row with thres * 2-norm of row. + ! + rwk = row(k) + + if (abs(rwk) < thres*nrmi) then + ! + ! Drop the entry. + ! + row(k) = dzero + irwt(k) = 0 + cycle + else + ! + ! Note: since U is scaled while copying it out (see ilut_copyout), + ! we can use rwk in the update below. + ! + do jj=irp(k),irp(k+1)-1 + j = ja(jj) + if (j<=k) then + info = -i + return + endif + ! + ! Update row(j) and, if it is not to be discarded, insert + ! its index into the heap for further processing. + ! + row(j) = row(j) - rwk * val(jj) + if (irwt(j) == 0) then + if (abs(row(j)) < thres*nrmi) then + ! + ! Drop the entry. + ! + row(j) = dzero + else + ! + ! Do the insertion. + ! + call heap%insert(j,info) + if (info /= psb_success_) return + irwt(j) = 1 + end if + end if + end do + end if + + ! + ! If we get here it is an index we need to keep on copyout. + ! + + nidx = nidx + 1 + call psb_ensure_size(nidx,idxs,info,addsz=psb_heap_resize) + if (info /= psb_success_) return + idxs(nidx) = k + irwt(k) = 0 + end do + + irwt(i) = 0 +end subroutine psb_z_invt_inv diff --git a/prec/impl/psb_z_prec_type_impl.f90 b/prec/impl/psb_z_prec_type_impl.f90 index 9cbd32ca..00f0b05e 100644 --- a/prec/impl/psb_z_prec_type_impl.f90 +++ b/prec/impl/psb_z_prec_type_impl.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,14 +27,14 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! -! +! +! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -46,7 +46,7 @@ !!$ 3. The name of the PSBLAS group or the names of its contributors may !!$ not be used to endorse or promote products derived from this !!$ software without specific written permission. -!!$ +!!$ !!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS !!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED !!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -58,14 +58,14 @@ !!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) !!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE !!$ POSSIBILITY OF SUCH DAMAGE. -!!$ -!!$ +!!$ +!!$ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine psb_z_apply2_vect(prec,x,y,desc_data,info,trans,work) use psb_base_mod use psb_z_prec_type, psb_protect_name => psb_z_apply2_vect - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_zprec_type), intent(inout) :: prec type(psb_z_vect_type),intent(inout) :: x @@ -74,7 +74,7 @@ subroutine psb_z_apply2_vect(prec,x,y,desc_data,info,trans,work) character(len=1), optional :: trans complex(psb_dpk_),intent(inout), optional, target :: work(:) - character :: trans_ + character :: trans_ complex(psb_dpk_), pointer :: work_(:) type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me @@ -88,25 +88,25 @@ subroutine psb_z_apply2_vect(prec,x,y,desc_data,info,trans,work) ctxt = desc_data%get_context() call psb_info(ctxt, me, np) - if (present(trans)) then + if (present(trans)) then trans_=psb_toupper(trans) else trans_='N' end if - if (present(work)) then + if (present(work)) then work_ => work else allocate(work_(4*desc_data%get_local_cols()),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') - goto 9999 + goto 9999 end if end if - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") goto 9999 @@ -115,13 +115,13 @@ subroutine psb_z_apply2_vect(prec,x,y,desc_data,info,trans,work) call prec%prec%apply(zone,x,zzero,y,desc_data,info,& & trans=trans_,work=work_) - if (present(work)) then + if (present(work)) then else deallocate(work_,stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='DeAllocate') - goto 9999 + goto 9999 end if end if @@ -136,7 +136,7 @@ end subroutine psb_z_apply2_vect subroutine psb_z_apply1_vect(prec,x,desc_data,info,trans,work) use psb_base_mod use psb_z_prec_type, psb_protect_name => psb_z_apply1_vect - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_zprec_type), intent(inout) :: prec type(psb_z_vect_type),intent(inout) :: x @@ -145,7 +145,7 @@ subroutine psb_z_apply1_vect(prec,x,desc_data,info,trans,work) complex(psb_dpk_),intent(inout), optional, target :: work(:) type(psb_z_vect_type) :: ww - character :: trans_ + character :: trans_ complex(psb_dpk_), pointer :: work_(:) type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me @@ -159,25 +159,25 @@ subroutine psb_z_apply1_vect(prec,x,desc_data,info,trans,work) ctxt = desc_data%get_context() call psb_info(ctxt, me, np) - if (present(trans)) then + if (present(trans)) then trans_=psb_toupper(trans) else trans_='N' end if - if (present(work)) then + if (present(work)) then work_ => work else allocate(work_(4*desc_data%get_local_cols()),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') - goto 9999 + goto 9999 end if end if - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") goto 9999 @@ -188,13 +188,13 @@ subroutine psb_z_apply1_vect(prec,x,desc_data,info,trans,work) & trans=trans_,work=work_) if (info == 0) call psb_geaxpby(zone,ww,zzero,x,desc_data,info) call psb_gefree(ww,desc_data,info) - if (present(work)) then + if (present(work)) then else deallocate(work_,stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='DeAllocate') - goto 9999 + goto 9999 end if end if @@ -209,7 +209,7 @@ end subroutine psb_z_apply1_vect subroutine psb_z_apply2v(prec,x,y,desc_data,info,trans,work) use psb_base_mod use psb_z_prec_type, psb_protect_name => psb_z_apply2v - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_zprec_type), intent(inout) :: prec complex(psb_dpk_),intent(inout) :: x(:) @@ -218,7 +218,7 @@ subroutine psb_z_apply2v(prec,x,y,desc_data,info,trans,work) character(len=1), optional :: trans complex(psb_dpk_),intent(inout), optional, target :: work(:) - character :: trans_ + character :: trans_ complex(psb_dpk_), pointer :: work_(:) type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me @@ -232,37 +232,37 @@ subroutine psb_z_apply2v(prec,x,y,desc_data,info,trans,work) ctxt = desc_data%get_context() call psb_info(ctxt, me, np) - if (present(trans)) then + if (present(trans)) then trans_=trans else trans_='N' end if - if (present(work)) then + if (present(work)) then work_ => work else allocate(work_(4*desc_data%get_local_cols()),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') - goto 9999 + goto 9999 end if end if - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") goto 9999 end if call prec%prec%apply(zone,x,zzero,y,desc_data,info,trans_,work=work_) - if (present(work)) then + if (present(work)) then else deallocate(work_,stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='DeAllocate') - goto 9999 + goto 9999 end if end if @@ -277,7 +277,7 @@ end subroutine psb_z_apply2v subroutine psb_z_apply1v(prec,x,desc_data,info,trans) use psb_base_mod use psb_z_prec_type, psb_protect_name => psb_z_apply1v - implicit none + implicit none type(psb_desc_type),intent(in) :: desc_data class(psb_zprec_type), intent(inout) :: prec complex(psb_dpk_),intent(inout) :: x(:) @@ -293,36 +293,34 @@ subroutine psb_z_apply1v(prec,x,desc_data,info,trans) name='psb_z_apply1v' info = psb_success_ call psb_erractionsave(err_act) - - ctxt = desc_data%get_context() call psb_info(ctxt, me, np) - if (present(trans)) then + if (present(trans)) then trans_=psb_toupper(trans) else trans_='N' end if - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") goto 9999 end if allocate(ww(size(x)),w1(size(x)),stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='Allocate') - goto 9999 + goto 9999 end if call prec%prec%apply(zone,x,zzero,ww,desc_data,info,& & trans_,work=w1) if(info /= psb_success_) goto 9999 x(:) = ww(:) deallocate(ww,W1,stat=info) - if (info /= psb_success_) then + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err='DeAllocate') - goto 9999 + goto 9999 end if @@ -336,3 +334,159 @@ subroutine psb_z_apply1v(prec,x,desc_data,info,trans) end subroutine psb_z_apply1v +subroutine psb_zcprecseti(prec,what,val,info,ilev,ilmax,pos,idx) + use psb_base_mod + use psb_z_prec_type, psb_protect_name => psb_zcprecseti + implicit none + + class(psb_zprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + integer(psb_ipk_), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + ! This optional inputs are backport from the inputs available in AMG4PSBLAS, + ! they are of no actual use here a part from compatibility reasons. + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + + ! Local variables + character(len=*), parameter :: name='psb_precseti' + + info = psb_success_ + + ! We need to convert from the 'what' string to the corresponding integer + ! value befor passing the call to the set of the inner method. + select case (psb_toupper(trim(what))) + case ('SUB_FILLIN') + call prec%prec%precset(psb_ilu_fill_in_,val,info) + case ('INV_FILLIN') + call prec%prec%precset(psb_inv_fillin_,val,info) + case default + info = psb_err_invalid_args_combination_ + write(psb_err_unit,*) name,& + & ': Error: uninitialized preconditioner,',& + &' should call prec%init' + return + end select + +end subroutine psb_zcprecseti + +subroutine psb_zcprecsetr(prec,what,val,info,ilev,ilmax,pos,idx) + use psb_base_mod + use psb_z_prec_type, psb_protect_name => psb_zcprecsetr + implicit none + + class(psb_zprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + real(psb_dpk_), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + ! This optional inputs are backport from the inputs available in AMG4PSBLAS, + ! they are of no actual use here a part from compatibility reasons. + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + + ! Local variables + character(len=*), parameter :: name='psb_precsetr' + + info = psb_success_ + + ! We need to convert from the 'what' string to the corresponding integer + ! value befor passing the call to the set of the inner method. + select case (psb_toupper(trim(what))) + case('SUB_ILUTHRS') + call prec%prec%precset(psb_fact_eps_,val,info) + case('INV_THRESH') + call prec%prec%precset(psb_inv_thresh_,val,info) + case default + info = psb_err_invalid_args_combination_ + write(psb_err_unit,*) name,& + & ': Error: uninitialized preconditioner,',& + &' should call prec%init' + return + end select + +end subroutine psb_zcprecsetr + +subroutine psb_zcprecsetc(prec,what,string,info,ilev,ilmax,pos,idx) + use psb_base_mod + use psb_z_prec_type, psb_protect_name => psb_zcprecsetc + implicit none + + class(psb_zprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + character(len=*), intent(in) :: string + integer(psb_ipk_), intent(out) :: info + ! This optional inputs are backport from the inputs available in AMG4PSBLAS, + ! they are of no actual use here a part from compatibility reasons. + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + + ! Local variables + character(len=*), parameter :: name='psb_precsetc' + + info = psb_success_ + + ! We need to convert from the 'what' string to the corresponding integer + ! value befor passing the call to the set of the inner method. + select case (psb_toupper(trim(what))) + case ('SUB_SOLVE') + ! We select here the type of solver on the block + select case (psb_toupper(trim(string))) + case("ILU") + call prec%prec%precset(psb_f_type_,psb_f_ilu_k_,info) + call prec%prec%precset(psb_ilu_ialg_,psb_ilu_n_,info) + case("ILUT") + call prec%prec%precset(psb_f_type_,psb_f_ilu_t_,info) + call prec%prec%precset(psb_ilu_ialg_,psb_ilu_t_,info) + case("AINV") + call prec%prec%precset(psb_f_type_,psb_f_ainv_,info) + case("INVK") + call prec%prec%precset(psb_f_type_,psb_f_invk_,info) + case("INVT") + call prec%prec%precset(psb_f_type_,psb_f_invt_,info) + case default + ! Default to ILU(0) factorization + call prec%prec%precset(psb_f_type_,psb_f_ilu_n_,info) + call prec%prec%precset(psb_ilu_ialg_,psb_ilu_n_,info) + end select + case ("ILU_ALG") + select case (psb_toupper(trim(string))) + case ("MILU") + call prec%prec%precset(psb_ilu_ialg_,psb_milu_n_,info) + case default + ! Do nothing + end select + case ("ILUT_SCALE") + select case (psb_toupper(trim(string))) + case ("MAXVAL") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_maxval_,info) + case ("DIAG") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_diag_,info) + case ("ARWSUM") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_arwsum_,info) + case ("ARCSUM") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_arcsum_,info) + case ("ACLSUM") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_aclsum_,info) + case ("NONE") + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_none_,info) + case default + call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_none_,info) + end select + case ("AINV_ALG") + select case (psb_toupper(trim(string))) + case("LLK") + call prec%prec%precset(psb_ainv_alg_,psb_ainv_llk_,info) + case("SYM-LLK") + call prec%prec%precset(psb_ainv_alg_,psb_ainv_s_llk_,info) + case("STAB-LLK") + call prec%prec%precset(psb_ainv_alg_,psb_ainv_s_ft_llk_,info) + case("MLK","LMX") + call prec%prec%precset(psb_ainv_alg_,psb_ainv_mlk_,info) + case default + call prec%prec%precset(psb_ainv_alg_,psb_ainv_llk_,info) + end select + case default + + end select + +end subroutine psb_zcprecsetc diff --git a/prec/impl/psb_z_sp_drop.f90 b/prec/impl/psb_z_sp_drop.f90 new file mode 100644 index 00000000..754c76cc --- /dev/null +++ b/prec/impl/psb_z_sp_drop.f90 @@ -0,0 +1,168 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG-AINV, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +subroutine psb_z_sp_drop(idiag,nzrmax,sp_thresh,nz,iz,valz,info) + use psb_base_mod + implicit none + real(psb_dpk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(in) :: idiag, nzrmax + integer(psb_ipk_), intent(inout) :: nz + integer(psb_ipk_), intent(inout) :: iz(:) + complex(psb_dpk_), intent(inout) :: valz(:) + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: i, j, idf, nw + complex(psb_dpk_) :: witem + integer(psb_ipk_) :: widx + complex(psb_dpk_), allocatable :: xw(:) + integer(psb_ipk_), allocatable :: xwid(:), indx(:) + + + info = psb_success_ + + if (nz > min(size(iz),size(valz))) then + write(0,*) 'Serious size problem ',nz,size(iz),size(valz) + info = -2 + return + end if + allocate(xw(nz),xwid(nz),indx(nz),stat=info) + if (info /= psb_success_) then + write(psb_err_unit,*) ' Memory allocation failure in sp_drop',nz,info + return + endif + + ! Always keep the diagonal element + idf = -1 + do i=1, nz + if (iz(i) == idiag) then + idf = i + witem = valz(i) + widx = iz(i) + valz(i) = valz(1) + iz(i) = iz(1) + valz(1) = witem + iz(1) = widx + exit + end if + end do + + if (idf == -1) then + + xw(1:nz) = valz(1:nz) + call psb_qsort(xw(1:nz),indx(1:nz),dir=psb_asort_down_) + do i=1, nz + xwid(i) = iz(indx(i)) + end do + nw = min(nz,nzrmax) + do + if (nw <= 1) exit + if (abs(xw(nw)) < sp_thresh) then + nw = nw - 1 + else + exit + end if + end do + nw = max(nw, 1) + + else + + nw = nz-1 + + xw(1:nw) = valz(2:nz) + + call psb_qsort(xw(1:nw),indx(1:nw),dir=psb_asort_down_) + nw = min(nw,nzrmax-1) + do + if (nw <= 1) exit + if (abs(xw(nw)) < sp_thresh) then + nw = nw - 1 + else + exit + end if + end do + + do i=1, nw + xwid(i) = iz(1+indx(i)) + end do + nw = nw + 1 + xw(nw) = valz(1) + xwid(nw) = iz(1) + end if + + call psb_msort(xwid(1:nw),indx(1:nw),dir=psb_sort_up_) + + do i=1, nw + valz(i) = xw(indx(i)) + iz(i) = xwid(i) + end do + nz = nw + if (nz>nzrmax) write(0,*) 'in sp_drop: ',nw,nzrmax,nz + deallocate(xw,xwid,indx,stat=info) + if (info /= psb_success_) then + write(psb_err_unit,*) ' Memory deallocation failure in sp_drop',info + return + endif + return +end subroutine psb_z_sp_drop diff --git a/prec/impl/psb_z_sparsify.f90 b/prec/impl/psb_z_sparsify.f90 new file mode 100644 index 00000000..fe29230f --- /dev/null +++ b/prec/impl/psb_z_sparsify.f90 @@ -0,0 +1,260 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG-AINV, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +subroutine psb_z_sparsify(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,info,istart,iheap,ikr) + use psb_base_mod + implicit none + + real(psb_dpk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(in) :: idiag, n, nzrmax + complex(psb_dpk_), intent(inout) :: zw(:) + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), intent(out) :: iz(:) + complex(psb_dpk_), intent(out) :: valz(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: istart + type(psb_i_heap), optional :: iheap + integer(psb_ipk_), optional :: ikr(:) + ! + integer(psb_ipk_) :: i, istart_, last_i, iret,k + complex(psb_dpk_) :: witem + integer(psb_ipk_) :: widx + complex(psb_dpk_), allocatable :: xw(:) + integer(psb_ipk_), allocatable :: xwid(:), indx(:) + type(psb_z_idx_heap) :: heap + + + info = psb_success_ + istart_ = 1 + if (present(istart)) istart_ = max(1,istart) + if (.false.) then + nz = 0 + do i=istart_, n + if ((i == idiag).or.(abs(zw(i)) >= sp_thresh)) then + nz = nz + 1 + iz(nz) = i + valz(nz) = zw(i) + end if + end do + + else + allocate(xw(nzrmax),xwid(nzrmax),indx(nzrmax),stat=info) + if (info /= psb_success_) then + return + end if + + call heap%init(info,dir=psb_asort_down_) + + ! Keep at least the diagonal + nz = 0 + + if (present(iheap)) then + if (.not.(present(ikr))) then + write(psb_err_unit,*) 'Error: if IHEAP then also IKR' + info = -1 + return + end if + last_i = -1 + do + call iheap%get_first(i,iret) + if (iret < 0) exit + ! An index may have been put on the heap more than once. + if (i == last_i) cycle + last_i = i + if (i == idiag) then + xw(1) = zw(i) + xwid(1) = i + else if (abs(zw(i)) >= sp_thresh) then + call heap%insert(zw(i),i,info) + end if + zw(i) = dzero + ikr(i) = 0 + end do + + else + + do i=istart_, n + if (i == idiag) then + xw(1) = zw(i) + xwid(1) = i + else if (abs(zw(i)) >= sp_thresh) then + call heap%insert(zw(i),i,info) + end if + zw(i) = dzero + end do + end if + + k = 1 + do + if (k == nzrmax) exit + call heap%get_first(witem,widx,info) + if (info == -1) then + info = psb_success_ + exit + endif + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + return + end if + k = k + 1 + xw(k) = witem + xwid(k) = widx + end do + call heap%free(info) + nz = k + call psb_msort(xwid(1:nz),indx(1:nz),dir=psb_sort_up_) +!!$ write(0,*) 'sparsify output for idiag ',idiag,' :',nz,sp_thresh + do i=1, nz + valz(i) = xw(indx(i)) + iz(i) = xwid(i) +!!$ write(0,*) ' ',iz(i),valz(i) + end do + + end if + + return + +end subroutine psb_z_sparsify + + +subroutine psb_z_sparsify_list(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,lhead,listv,ikr,info) + use psb_base_mod + implicit none + + real(psb_dpk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(in) :: idiag, n, nzrmax + complex(psb_dpk_), intent(inout) :: zw(:) + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), intent(out) :: iz(:) + complex(psb_dpk_), intent(out) :: valz(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(inout) :: lhead, listv(:) + integer(psb_ipk_) :: ikr(:) + ! + integer(psb_ipk_) :: i, istart_, last_i, iret,k,current, next + complex(psb_dpk_) :: witem + integer(psb_ipk_) :: widx + complex(psb_dpk_), allocatable :: xw(:) + integer(psb_ipk_), allocatable :: xwid(:), indx(:) + + + info = psb_success_ + istart_ = 1 + allocate(xw(n),xwid(n),indx(n),stat=info) + + current = lhead + lhead = -1 + i = 0 + do while (current >0) + i = i + 1 + xw(i) = zw(current) + xwid(i) = current + + if (current == idiag) then + ! Bring the diagona into first position + witem = xw(1) + widx = xwid(1) + xw(1) = xw(i) + xwid(1) = xwid(i) + xw(i) = witem + xwid(i) = widx + end if + + zw(current) = dzero + ikr(current) = 0 + + next = listv(current) + listv(current) = -1 + current = next + end do + nz = i + if (nz > 2) call psb_hsort(xw(2:nz),ix=xwid(2:nz),& + & dir=psb_asort_down_,flag=psb_sort_keep_idx_) +!!$ write(0,*) 'Done first msort ' +!!$ write(0,*) ' after first msort for idiag ',idiag,' :',nz,sp_thresh +!!$ do i=1, nz +!!$ write(0,*) ' ',xwid(i),xw(i) +!!$ end do + + i = 2 + do while (i<=nz) + if (abs(xw(i)) < sp_thresh) exit + i = i + 1 + end do +!!$ write(0,*) 'NZ ',nz, i, nzrmax + nz = max(1,min(i-1,nzrmax)) + call psb_msort(xwid(1:nz),ix=indx(1:nz),dir=psb_sort_up_) +!!$ write(0,*) 'Done second msort ' + +!!$ write(0,*) 'sparsify output for idiag ',idiag,' :',nz,i,sp_thresh + do i=1, nz + valz(i) = xw(indx(i)) + iz(i) = xwid(i) +!!$ write(0,*) ' ',iz(i),valz(i) + end do + + return + +end subroutine psb_z_sparsify_list diff --git a/prec/impl/psb_zrwclip.f90 b/prec/impl/psb_zrwclip.f90 new file mode 100644 index 00000000..574ebcf8 --- /dev/null +++ b/prec/impl/psb_zrwclip.f90 @@ -0,0 +1,90 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG-AINV, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +subroutine psb_z_rwclip(nz,ia,ja,val,imin,imax,jmin,jmax) + use psb_base_mod + + implicit none + integer(psb_ipk_), intent(inout) :: nz + integer(psb_ipk_), intent(inout) :: ia(*), ja(*) + complex(psb_dpk_), intent(inout) :: val(*) + integer(psb_ipk_), intent(in) :: imin,imax,jmin,jmax + + integer(psb_ipk_) :: i,j + + j = 0 + do i=1, nz + if ((imin <= ia(i)).and.& + & (ia(i) <= imax).and.& + & (jmin <= ja(i)).and.& + & (ja(i) <= jmax) ) then + j = j + 1 + ia(j) = ia(i) + ja(j) = ja(i) + val(j) = val(i) + end if + end do + nz = j +end subroutine psb_z_rwclip diff --git a/prec/impl/psb_zsparse_biconjg_llk.F90 b/prec/impl/psb_zsparse_biconjg_llk.F90 new file mode 100644 index 00000000..2d3a90cb --- /dev/null +++ b/prec/impl/psb_zsparse_biconjg_llk.F90 @@ -0,0 +1,366 @@ +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +subroutine psb_zsparse_biconjg_llk(n,a,p,z,w,nzrmax,sp_thresh,info) + use psb_base_mod + use psb_z_ainv_tools_mod + use psb_z_biconjg_mod, psb_protect_name => psb_zsparse_biconjg_llk + ! + ! Left-looking variant + ! + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_z_csr_sparse_mat), intent(in) :: a + type(psb_z_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_dpk_), intent(in) :: sp_thresh + complex(psb_dpk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + ! Locals + integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:) + complex(psb_dpk_), allocatable :: zval(:),val(:), q(:) + integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,& + & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,& + & ipj, lastj, nextj, nzw + type(psb_i_heap) :: heap, rheap + type(psb_z_csc_sparse_mat) :: ac + complex(psb_dpk_) :: alpha + character(len=20) :: name='psb_orth_llk' + logical, parameter :: debug=.false. + + allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),stat=info) + if (info == psb_success_) call ac%cp_from_fmt(a,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + return + end if + ! + ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap. + ! izcr(i): flag rows to be used for the dot products. Used to minimize + ! traffic in rheap. + ! + do i=1,n + izkr(i) = 0 + izcr(i) = 0 + zval(i) = zzero + end do + + ! Init z_1=e_1 and p_1=a_11 + p(1) = zzero + i = 1 + nz = a%irp(i+1) - a%irp(i) + do j=1,nz + if (a%ja(j) == 1) then + p(1) = a%val(j) + exit + end if + end do + if (abs(p(1)) < d_epstol) & + & p(1) = 1.d-3 + + q(1) = p(1) + ! + ! + call z%allocate(n,n,n*nzrmax) + + z%icp(1) = 1 + z%icp(2) = 2 + z%ia(1) = 1 + z%val(1) = zone + nzz = 1 + + call w%allocate(n,n,n*nzrmax) + w%icp(1) = 1 + w%icp(2) = 2 + w%ia(1) = 1 + w%val(1) = zone + nzw = 1 + + do i = 2, n + if (debug) write(0,*) 'Main loop iteration ',i,n + + ! + ! Update loop on Z. + ! Must be separated from update loop of W because of + ! the conflict on J that would result. + ! + + ! ZVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = zzero + ! !$ end do + zval(i) = zone + izkr(i) = 1 + call heap%init(info) + if (info == psb_success_) call heap%insert(i,info) + + if (info == psb_success_) call rheap%init(info) + + do j = ac%icp(i), ac%icp(i+1)-1 + if (ac%ia(j) < i) then +!!$ write(0,*) i, ' Outer inserting ',ac%ia(j) + if (info == psb_success_) call rheap%insert(ac%ia(j),info) + izcr(ac%ia(j)) = 1 + end if + end do + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_init_heap') + return + end if + + ! Update loop + ! The idea is to keep track of the indices of the nonzeros in zval, + ! so as to only do the dot products on the rows which have nonzeros + ! in their positions; to do this we keep an extra + ! copy of A in CSC, and the row indices to be considered are in rheap. + lastj = -1 + outer: do + inner: do + call rheap%get_first(j,info) + if (debug) write(0,*) 'from get_first: ',j,info + if (info == -1) exit outer ! Empty heap + if (j > lastj) then + lastj = j + exit inner + end if + end do inner + + izcr(j) = 0 + if (j>=i) cycle outer + if (debug) write(0,*) 'update loop, using row: ',j,i + ip1 = a%irp(j) + ip2 = a%irp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (a%ja(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval) + ! ! write(psb_err_unit,*) j,i,p(i) + + alpha = (-p(i)/p(j)) +!!$ write(0,*) 'At step ',i,j,' p(i) ',p(i),alpha + if (.false..or.(abs(alpha) > sp_thresh)) then +!!$ write(0,*) 'At step ',i,j,' range ',z%icp(j), z%icp(j+1)-1, & +!!$ & ' vals ',z%ia(z%icp(j):z%icp(j+1)-1) + do k=z%icp(j), z%icp(j+1)-1 + kr = z%ia(k) + zval(kr) = zval(kr) + alpha*z%val(k) + if (izkr(kr) == 0) then +!!$ write(0,*) ' main inner Inserting ',kr + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj lastj) then + lastj = j + exit innerw + end if + end do innerw + izcr(j) = 0 + if (j>=i) cycle outerw + if (debug) write(0,*) 'update loop, using row: ',j + ip1 = ac%icp(j) + ip2 = ac%icp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (ac%ia(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-q(i)/q(j)) + if (.false..or.(abs(alpha) > sp_thresh)) then + + do k=w%icp(j), w%icp(j+1)-1 + kr = w%ia(k) + zval(kr) = zval(kr) + alpha*w%val(k) + if (izkr(kr) == 0) then + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj psb_zsparse_biconjg_llk_noth + + ! + ! Left-looking variant, with NO drop rule on p(i)/p(j) + ! + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_z_csr_sparse_mat), intent(in) :: a + type(psb_z_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_dpk_), intent(in) :: sp_thresh + complex(psb_dpk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + ! Locals + integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:) + complex(psb_dpk_), allocatable :: zval(:),val(:), q(:) + integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,& + & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,& + & ipj, lastj, nextj, nzw + type(psb_i_heap) :: heap, rheap + type(psb_z_csc_sparse_mat) :: ac + real(psb_dpk_) :: alpha + character(len=20) :: name='psb_orth_llk_noth' + logical, parameter :: debug=.false. + + allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),stat=info) + if (info == psb_success_) call ac%cp_from_fmt(a,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + return + end if + ! + ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap. + ! izcr(i): flag rows to be used for the dot products. Used to minimize + ! traffic in rheap. + ! + do i=1,n + izkr(i) = 0 + izcr(i) = 0 + zval(i) = zzero + end do + + ! Init z_1=e_1 and p_1=a_11 + p(1) = zzero + i = 1 + nz = a%irp(i+1) - a%irp(i) + do j=1,nz + if (a%ja(j) == 1) then + p(1) = a%val(j) + exit + end if + end do + if (abs(p(1)) < d_epstol) & + & p(1) = 1.d-3 + + q(1) = p(1) + ! + ! + call z%allocate(n,n,n*nzrmax) + + z%icp(1) = 1 + z%icp(2) = 2 + z%ia(1) = 1 + z%val(1) = done + nzz = 1 + + call w%allocate(n,n,n*nzrmax) + w%icp(1) = 1 + w%icp(2) = 2 + w%ia(1) = 1 + w%val(1) = done + nzw = 1 + + do i = 2, n + if (debug) write(0,*) 'Main loop iteration ',i,n + + ! + ! Update loop on Z. + ! Must be separated from update loop of W because of + ! the conflict on J that would result. + ! + + ! ZVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = zzero + ! !$ end do + zval(i) = done + izkr(i) = 1 + call heap%init(info) + if (info == psb_success_) call heap%insert(i,info) + + if (info == psb_success_) call rheap%init(info) + do j = ac%icp(i), ac%icp(i+1)-1 + if (ac%ia(j) < i) then + if (info == psb_success_) call rheap%insert(ac%ia(j),info) + izcr(ac%ia(j)) = 1 + end if + end do + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_init_heap') + return + end if + + ! Update loop + ! The idea is to keep track of the indices of the nonzeros in zval, + ! so as to only do the dot products on the rows which have nonzeros + ! in their positions; to do this we keep an extra + ! copy of A in CSC, and the row indices to be considered are in rheap. + lastj = -1 + outer: do + inner: do + call rheap%get_first(j,info) + if (debug) write(0,*) 'from get_first: ',j,info + if (info == -1) exit outer ! Empty heap + if (j > lastj) then + lastj = j + exit inner + end if + end do inner + + izcr(j) = 0 + if (j>=i) cycle outer + if (debug) write(0,*) 'update loop, using row: ',j,i + ip1 = a%irp(j) + ip2 = a%irp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (a%ja(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-p(i)/p(j)) + + if (.true.) then + do k=z%icp(j), z%icp(j+1)-1 + kr = z%ia(k) + zval(kr) = zval(kr) + alpha*z%val(k) + if (izkr(kr) == 0) then + + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj lastj) then + lastj = j + exit innerw + end if + end do innerw + izcr(j) = 0 + if (j>=i) cycle outerw + if (debug) write(0,*) 'update loop, using row: ',j + ip1 = ac%icp(j) + ip2 = ac%icp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (ac%ia(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-q(i)/q(j)) + if (.true.) then + + do k=w%icp(j), w%icp(j+1)-1 + kr = w%ia(k) + zval(kr) = zval(kr) + alpha*w%val(k) + if (izkr(kr) == 0) then + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj psb_zsparse_biconjg_mlk + ! + ! Left-looking variant + ! + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_z_csr_sparse_mat), intent(in) :: a + type(psb_z_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_dpk_), intent(in) :: sp_thresh + complex(psb_dpk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + ! Locals + integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:), hlist(:), bfr(:), rwlist(:) + complex(psb_dpk_), allocatable :: zval(:),val(:), q(:) + integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,& + & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,& + & ipj, lastj, nextj, nzw, hlhead, li, mj, kkc, ifrst, ilst, rwhead + type(psb_i_heap) :: heap, rheap + type(psb_z_csc_sparse_mat) :: ac + complex(psb_dpk_) :: alpha + character(len=20) :: name='psb_biconjg_mlk' + logical, parameter :: debug=.false., test_merge=.true. + + allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n), & + & hlist(n),rwlist(n),bfr(n),stat=info) + if (info == psb_success_) call ac%cp_from_fmt(a,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + return + end if + ! + ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap. + ! izcr(i): flag rows to be used for the dot products. Used to minimize + ! traffic in rheap. + ! + do i=1,n + izkr(i) = 0 + izcr(i) = 0 + zval(i) = zzero + hlist(i) = -1 + rwlist(i) = -1 + end do + + ! Init z_1=e_1 and p_1=a_11 + p(1) = zzero + i = 1 + nz = a%irp(i+1) - a%irp(i) + do j=1,nz + if (a%ja(j) == 1) then + p(1) = a%val(j) + exit + end if + end do + if (abs(p(1)) < d_epstol) & + & p(1) = 1.d-3 + + q(1) = p(1) + ! + ! + call z%allocate(n,n,n*nzrmax) + + z%icp(1) = 1 + z%icp(2) = 2 + z%ia(1) = 1 + z%val(1) = zone + nzz = 1 + + call w%allocate(n,n,n*nzrmax) + w%icp(1) = 1 + w%icp(2) = 2 + w%ia(1) = 1 + w%val(1) = zone + nzw = 1 + + + do i = 2, n + if (debug) write(0,*) 'Main loop iteration ',i,n + + ! + ! Update loop on Z. + ! Must be separated from update loop of W because of + ! the conflict on J that would result. + ! + + ! ZVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = zzero + ! !$ end do + zval(i) = zone + izkr(i) = 1 + rwhead = i + + hlhead = -1 + + kkc = 0 + ilst = ac%icp(i)-1 + ifrst = ac%icp(i) + do j = ac%icp(i+1)-1, ac%icp(i), -1 + if (ac%ia(j) < i) then + ilst = j + exit + end if + end do + kkc = ilst-ifrst+1 + + if (.true..or.debug) then +!!$ write(0,*) 'Outer Before insertion : ',hlhead + call printlist(hlhead,hlist) + end if + if (kkc > 0) then +!!$ write(0,*) i,' Outer Inserting : ',kkc,':',ac%ia(ifrst:ilst) + + !call hlmerge(hlhead,hlist,bfr(1:kkc)) + call hlmerge(hlhead,hlist,ac%ia(ifrst:ilst)) + end if + if (debug) then + write(0,*) 'Outer After insertion: ',hlhead + call printlist(hlhead,hlist) + end if + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='init_lists') + return + end if + + ! Update loop + ! The idea is to keep track of the indices of the nonzeros in zval, + ! so as to only do the dot products on the rows which have nonzeros + ! in their positions; to do this we keep an extra + ! copy of A in CSC, and the row indices to be considered are in rheap. + lastj = -1 + outer: do + mj = hlhead + if (mj > 0) then + hlhead = hlist(mj) + hlist(mj) = -1 + end if + j = mj + if (j < 0) exit outer + + izcr(j) = 0 + if (j>=i) cycle outer + + if (debug) write(0,*) 'update loop, using row: ',j,i,mj + ip1 = a%irp(j) + ip2 = a%irp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (a%ja(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-p(i)/p(j)) +!!$ write(0,*) 'At step ',i,j,' p(i) ',p(i),alpha +!!$ write(0,*) ' Current list is : ',hlhead +!!$ call printlist(hlhead,hlist) +!!$ + + + if (.false..or.(abs(alpha) > sp_thresh)) then + ifrst=z%icp(j) + ilst=z%icp(j+1)-1 + call hlmerge(rwhead,rwlist,z%ia(ifrst:ilst)) +!!$ write(0,*) 'At step ',i,j,' range ',z%icp(j), z%icp(j+1)-1, & +!!$ & ' vals ',z%ia(z%icp(j):z%icp(j+1)-1) + + do k=z%icp(j), z%icp(j+1)-1 + kr = z%ia(k) + zval(kr) = zval(kr) + alpha*z%val(k) + + if (izkr(kr) == 0) then +!!$ write(0,*) ' main inner Inserting ',kr +!!$ call hlmerge(rwhead,rwlist,(/kr/)) + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj)) then + ifrst = min(ifrst,kc ) + ilst = max(ilst,kc) + end if + end do + kkc = ilst-ifrst+1 + if (debug) then + write(0,*) 'Inner Before insertion: ' + call printlist(hlhead,hlist) + write(0,*) 'Inner Inserting : ',kkc,':',ac%ia(ifrst:ilst) + end if + if (ilst >= ifrst) then +!!$ write(0,*) j,i,' Inner inserting ',ac%ia(ifrst:ilst) + call hlmerge(hlhead,hlist,ac%ia(ifrst:ilst)) + end if + + if (debug) then + write(0,*) 'Inner After insertion: ',hlhead + call printlist(hlhead,hlist) + end if + + end if + if (info /= psb_success_) exit + end do + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + return + end if + end if + end do outer + call a%csget(i,i,nzra,ia,ja,val,info) + call rwclip(nzra,ia,ja,val,ione,n,ione,n) + p(i) = psb_spge_dot(nzra,ja,val,zval) + if (abs(p(i)) < d_epstol) & + & p(i) = 1.d-3 + +!!$ write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh + + ! + ! Sparsify current ZVAL and put into ZMAT + ! + call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,rwhead,rwlist,izkr,info) + + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='sparsify') + return + end if + call psb_ensure_size(nzz+nzrz, z%ia, info) + call psb_ensure_size(nzz+nzrz, z%val, info) + ipz1 = z%icp(i) + do j=1, nzrz + z%ia(ipz1 + j -1) = ia(j) + z%val(ipz1 + j -1) = val(j) + end do + z%icp(i+1) = ipz1 + nzrz + nzz = nzz + nzrz + + + ! WVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = zzero + ! !$ end do + zval(i) = zone + izkr(i) = 1 + rwhead = i + hlhead = -1 + + kkc = 0 + ilst = a%irp(i)-1 + ifrst = a%irp(i) + do j = a%irp(i+1)-1, a%irp(i), -1 + if (a%ja(j) < i) then + ilst = j + exit + end if + end do + kkc = ilst-ifrst+1 + + if (debug) then + write(0,*) 'Outer Before insertion: ' + call printlist(hlhead,hlist) + write(0,*) 'Outer Inserting : ',kkc,':',a%ja(ifrst:ilst) + end if + if (kkc > 0 ) then + !call hlmerge(hlhead,hlist,bfr(1:kkc)) + call hlmerge(hlhead,hlist,a%ja(ifrst:ilst)) + end if + if (debug) then + write(0,*) 'Outer After insertion: ',hlhead + call printlist(hlhead,hlist) + end if + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='init_lists') + return + end if + + ! Update loop + ! The idea is to keep track of the indices of the nonzeros in zval, + ! so as to only do the dot products on the rows which have nonzeros + ! in their positions; to do this we keep an extra + ! copy of A in CSC, and the row indices to be considered are in rheap. + lastj = -1 + outerw: do + mj = hlhead + if (hlhead > 0) then + hlhead = hlist(mj) + hlist(mj) = -1 + end if + j = mj + if (j < 0) exit outerw + + izcr(j) = 0 + if (j>=i) cycle outerw + if (debug) write(0,*) 'update loop, using row: ',j + ip1 = ac%icp(j) + ip2 = ac%icp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (ac%ia(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-q(i)/q(j)) + if (.false..or.(abs(alpha) > sp_thresh)) then + ifrst=w%icp(j) + ilst=w%icp(j+1)-1 + call hlmerge(rwhead,rwlist,w%ia(ifrst:ilst)) + + do k=w%icp(j), w%icp(j+1)-1 + kr = w%ia(k) + zval(kr) = zval(kr) + alpha*w%val(k) + if (izkr(kr) == 0) then + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj)) then + ifrst = min(ifrst,kc ) + ilst = max(ilst,kc) + end if + end do + kkc = ilst-ifrst+1 + if (debug) then + write(0,*) 'Inner Before insertion: ' + call printlist(hlhead,hlist) + write(0,*) 'Inner Inserting : ',kkc,':',a%ja(ifrst:ilst) + end if + + call hlmerge(hlhead,hlist,a%ja(ifrst:ilst)) + + if (debug) then + write(0,*) 'Inner After insertion: ',hlhead + call printlist(hlhead,hlist) + end if + if (debug) write(0,*) 'update loop, adding indices: ',& + & a%ja(a%irp(kr):a%irp(kr+1)-1) + + end if + if (info /= psb_success_) exit + end do + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_insert_heap') + return + end if + end if + end do outerw + ip1 = ac%icp(i) + ip2 = ac%icp(i+1) - 1 + do + if (ip2 < ip1 ) exit + if (ac%ia(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval) + if (abs(q(i)) < d_epstol) & + & q(i) = 1.d-3 + +!!$ write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh + ! + ! Sparsify current ZVAL and put into ZMAT + ! + call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,rwhead,rwlist,izkr,info) + if (info /= psb_success_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='sparsify') + return + end if + call psb_ensure_size(nzw+nzrz, w%ia, info) + call psb_ensure_size(nzw+nzrz, w%val, info) + ipz1 = w%icp(i) + do j=1, nzrz + w%ia(ipz1 + j -1) = ia(j) + w%val(ipz1 + j -1) = val(j) + end do + w%icp(i+1) = ipz1 + nzrz + nzw = nzw + nzrz + + end do + +contains + + subroutine hlmerge(head,listv,vals) + integer(psb_ipk_), intent(inout) :: head, listv(:) + integer(psb_ipk_), intent(in) :: vals(:) + integer(psb_ipk_) :: i,j,k, lh, lv, nv, vv, flh, ph + + nv = size(vals) + lh = head + flh = -1 + lv = 1 + if ((head < 0).and.(nv > 0)) then + ! Adjust head if empty + head = vals(lv) + lv = lv + 1 + else if ((head > 0) .and. (nv >0)) then + ! Adjust head if first item less than it + if (head > vals(lv)) then + listv(vals(lv)) = head + head = vals(lv) + lv = lv + 1 + end if + end if + + lh = head + ph = lh + do while ((lh > 0) .and. (lv <= nv)) + if (lh == vals(lv)) then + lv = lv + 1 + else if (lh > vals(lv)) then + listv(vals(lv)) = lh + listv(ph) = vals(lv) + lh = vals(lv) + lv = lv + 1 + else + ph = lh + lh = listv(lh) + end if + end do + lh = ph + do while (lv <= nv) + listv(lh) = vals(lv) + lh = listv(lh) + lv = lv + 1 + end do + end subroutine hlmerge + + + subroutine printlist(head,listv) + integer(psb_ipk_), intent(in) :: head, listv(:) + integer(psb_ipk_) :: li + + li = head + do while (li > 0) + write(0,*) 'Item: ', li + li = listv(li) + end do + end subroutine printlist + +end subroutine psb_zsparse_biconjg_mlk diff --git a/prec/impl/psb_zsparse_biconjg_s_ft_llk.F90 b/prec/impl/psb_zsparse_biconjg_s_ft_llk.F90 new file mode 100644 index 00000000..541a755c --- /dev/null +++ b/prec/impl/psb_zsparse_biconjg_s_ft_llk.F90 @@ -0,0 +1,414 @@ +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +subroutine psb_zsparse_biconjg_s_ft_llk(n,a,p,z,w,nzrmax,sp_thresh,info) + use psb_base_mod + use psb_z_ainv_tools_mod + use psb_z_biconjg_mod, psb_protect_name => psb_zsparse_biconjg_s_ft_llk + + ! + ! Left-looking variant, stabilized i.e. product by A is applied + ! to compute the diagonal elements. + ! + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_z_csr_sparse_mat), intent(in) :: a + type(psb_z_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_dpk_), intent(in) :: sp_thresh + complex(psb_dpk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + ! Locals + integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:),iww(:) + complex(psb_dpk_), allocatable :: zval(:),val(:), q(:), ww(:) + integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj, nzww,& + & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,& + & ipj, lastj, nextj, nzw, nzrw + type(psb_i_heap) :: heap, rheap + type(psb_z_csc_sparse_mat) :: ac + complex(psb_dpk_) :: alpha, tmpq,tmpq2 + character(len=20) :: name='psb_orth_llk' + logical, parameter :: debug=.false. + + allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),iww(n),ww(n),stat=info) + if (info == psb_success_) call ac%cp_from_fmt(a,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + return + end if + ! + ! Init pointers to: + ! ljr(i): last occupied column index within row I + ! izcr(i): first occupied row index within column I + ! + do i=1,n + izkr(i) = 0 + izcr(i) = 0 + zval(i) = zzero + end do + + ! Init z_1=e_1 and p_1=a_11 + p(1) = zzero + i = 1 + nz = a%irp(i+1) - a%irp(i) + do j=1,nz + if (a%ja(j) == 1) then + p(1) = a%val(j) + exit + end if + end do + if (abs(p(1)) < d_epstol) & + & p(1) = 1.d-3 + + q(1) = p(1) + ! + ! + call z%allocate(n,n,n*nzrmax) + + z%icp(1) = 1 + z%icp(2) = 2 + z%ia(1) = 1 + z%val(1) = zone + nzz = 1 + + call w%allocate(n,n,n*nzrmax) + w%icp(1) = 1 + w%icp(2) = 2 + w%ia(1) = 1 + w%val(1) = zone + nzw = 1 + + do i = 2, n + if (debug) write(0,*) 'Main loop iteration ',i,n + + ! + ! Update loop on Z. + ! Must be separated from update loop of W because of + ! the conflict on J that would result. + ! + + ! ZVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = zzero + ! !$ end do + zval(i) = zone + izkr(i) = 1 + call heap%init(info) + if (info == psb_success_) call heap%insert(i,info) + if (info == psb_success_) call rheap%init(info) + do j = ac%icp(i), ac%icp(i+1)-1 + if (ac%ia(j) lastj) then + lastj = j + exit inner + end if + end do inner + izcr(j) = 0 + if (j>=i) exit outer + if (debug) write(0,*) 'update loop, using row: ',j + ip1 = w%icp(j) + ip2 = w%icp(j+1) - 1 + nzra = max(0,ip2 - ip1 + 1) + nzww = 0 + call psb_z_spvspm(zone,a,nzra,w%ia(ip1:ip2),w%val(ip1:ip2),& + & zzero,nzww,iww,ww,info) + + p(i) = psb_spge_dot(nzww,iww,ww,zval) + + ipz1 = z%icp(j) + ipz2 = z%icp(j+1) + nzrz = ipz2-ipz1 + alpha = (-p(i)/p(j)) +!!$ write(0,*) ' p(i)/p(j) ',i,j,alpha,p(i),p(j) + if (.false..or.(abs(alpha) > sp_thresh)) then + + do k=ipz1, ipz2-1 + kr = z%ia(k) + zval(kr) = zval(kr) + alpha*z%val(k) + if (izkr(kr) == 0) then + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj lastj) then + lastj = j + exit innerw + end if + end do innerw + izcr(j) = 0 + if (j>=i) exit outerw + if (debug) write(0,*) 'update loop, using row: ',j + if (.false.) then + ip1 = ac%icp(j) + ip2 = ac%icp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (ac%ia(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + else + ip1 = z%icp(j) + ip2 = z%icp(j+1) - 1 + nzra = max(0,ip2 - ip1 + 1) + nzww = 0 + call psb_z_spmspv(zone,ac,nzra,z%ia(ip1:ip2),z%val(ip1:ip2),& + & zzero,nzww,iww,ww,info) + + q(i) = psb_spge_dot(nzww,iww,ww,zval) + end if + + ipz1 = w%icp(j) + ipz2 = w%icp(j+1) + nzrz = ipz2-ipz1 + alpha = (-q(i)/q(j)) +!!$ write(0,*) ' q(i)/q(j) ',i,j,alpha,q(i),q(j) + if (.false..or.(abs(alpha) > sp_thresh)) then + + do k=ipz1, ipz2-1 + kr = w%ia(k) + zval(kr) = zval(kr) + alpha*w%val(k) + if (izkr(kr) == 0) then + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj psb_zsparse_biconjg_s_llk + + ! + ! Left-looking variant SYMMETRIC/HERMITIAN A. You have been warned! + ! + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_z_csr_sparse_mat), intent(in) :: a + type(psb_z_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_dpk_), intent(in) :: sp_thresh + complex(psb_dpk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + ! Locals + integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:) + complex(psb_dpk_), allocatable :: zval(:),val(:), q(:) + integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,& + & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,& + & ipj, lastj, nextj, nzw,kk + type(psb_i_heap) :: heap, rheap + type(psb_z_csc_sparse_mat) :: ac + complex(psb_dpk_) :: alpha, zvalmax + character(len=20) :: name='psb_orth_llk' + logical, parameter :: debug=.false. + + allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),stat=info) + if (info == psb_success_) call ac%cp_from_fmt(a,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + return + end if + ! + ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap. + ! izcr(i): flag rows to be used for the dot products. Used to minimize + ! traffic in rheap. + ! + do i=1,n + izkr(i) = 0 + izcr(i) = 0 + zval(i) = zzero + end do + + ! Init z_1=e_1 and p_1=a_11 + p(1) = zzero + i = 1 + nz = a%irp(i+1) - a%irp(i) + do j=1,nz + if (a%ja(j) == 1) then + p(1) = a%val(j) + exit + end if + end do + if (abs(p(1)) < d_epstol) & + & p(1) = 1.d-3 + + ! + ! + call z%allocate(n,n,n*nzrmax) + + z%icp(1) = 1 + z%icp(2) = 2 + z%ia(1) = 1 + z%val(1) = zone + nzz = 1 + zvalmax = zone + + do i = 2, n + if (debug) write(0,*) 'Main loop iteration ',i,n + + ! + ! Update loop on Z. + ! Must be separated from update loop of W because of + ! the conflict on J that would result. + ! + + ! ZVAL = e_i + ! !$ do j=1, i-1 + ! !$ zval(j) = zzero + ! !$ end do + zval(i) = zone + izkr(i) = 1 + call heap%init(info) + if (info == psb_success_) call heap%insert(i,info) + + if (info == psb_success_) call rheap%init(info) + do j = ac%icp(i), ac%icp(i+1)-1 + if (ac%ia(j) < i) then + if (info == psb_success_) call rheap%insert(ac%ia(j),info) + izcr(ac%ia(j)) = 1 + end if + end do + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_init_heap') + return + end if + + ! Update loop + ! The idea is to keep track of the indices of the nonzeros in zval, + ! so as to only do the dot products on the rows which have nonzeros + ! in their positions; to do this we keep an extra + ! copy of A in CSC, and the row indices to be considered are in rheap. + lastj = -1 + outer: do + inner: do + call rheap%get_first(j,info) + if (debug) write(0,*) 'from get_first: ',j,info + if (info == -1) exit outer ! Empty heap + if (j > lastj) then + lastj = j + exit inner + end if + end do inner + + izcr(j) = 0 + if (j>=i) cycle outer + if (debug) write(0,*) 'update loop, using row: ',j,i + ip1 = a%irp(j) + ip2 = a%irp(j+1) - 1 + do + if (ip2 < ip1 ) exit + if (a%ja(ip2) <= n) exit + ip2 = ip2 -1 + end do + nzra = max(0,ip2 - ip1 + 1) + p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval) + ! !$ write(psb_err_unit,*) j,i,p(i) + + alpha = (-p(i)/p(j)) + + if (.false..or.(abs(alpha) > sp_thresh)) then + do k=z%icp(j), z%icp(j+1)-1 + kr = z%ia(k) + zval(kr) = zval(kr) + alpha*z%val(k) +!!$ if (abs(zval(kr)) > 1e16) then +!!$ write(0,*) i,j,p(i),p(j),alpha,z%val(k),alpha*z%val(k),kr,zval(kr) +!!$ end if + if (izkr(kr) == 0) then + + call heap%insert(kr,info) + if (info /= psb_success_) exit + izkr(kr) = 1 + ! We have just added a new nonzero in KR. Thus, we will + ! need to explicitly compute the dot products on all + ! rows jj).and.(nextj precout) - type is (psb_c_bjac_prec_type) + type is (psb_c_bjac_prec_type) call pout%set_ctxt(prec%get_ctxt()) - if (allocated(prec%av)) then + if (allocated(prec%av)) then allocate(pout%av(size(prec%av)),stat=info) - do i=1,size(prec%av) + do i=1,size(prec%av) if (info /= psb_success_) exit call prec%av(i)%clone(pout%av(i),info) enddo if (info /= psb_success_) goto 9999 end if - if (allocated(prec%dv)) then + if (allocated(prec%dv)) then allocate(pout%dv,stat=info) if (info == 0) call prec%dv%clone(pout%dv,info) end if @@ -316,7 +330,7 @@ contains subroutine psb_c_bjac_allocate_wrk(prec,info,vmold,desc) use psb_base_mod implicit none - + ! Arguments class(psb_c_bjac_prec_type), intent(inout) :: prec integer(psb_ipk_), intent(out) :: info @@ -326,11 +340,11 @@ contains ! Local variables integer(psb_ipk_) :: err_act, i character(len=20) :: name - + info=psb_success_ name = 'psb_c_allocate_wrk' call psb_erractionsave(err_act) - + if (psb_get_errstatus().ne.0) goto 9999 if (allocated(prec%wrk)) then if (size(prec%wrk)<2) then @@ -343,11 +357,11 @@ contains if (info /= 0) then info = psb_err_internal_error_; call psb_errpush(info,name,a_err="deallocate"); goto 9999 end if - + if (.not.allocated(prec%wrk)) then - if (.not.present(desc)) then + if (.not.present(desc)) then info = psb_err_internal_error_; call psb_errpush(info,name,a_err="no desc?"); goto 9999 - end if + end if allocate(prec%wrk(2),stat=info) do i=1, 2 if (info == 0) call psb_geall(prec%wrk(i),desc,info) @@ -357,19 +371,19 @@ contains if (info /= 0) then info = psb_err_internal_error_; call psb_errpush(info,name,a_err="allocate"); goto 9999 end if - + call psb_erractionrestore(err_act) return - + 9999 call psb_error_handler(err_act) return - + end subroutine psb_c_bjac_allocate_wrk subroutine psb_c_bjac_free_wrk(prec,info) use psb_base_mod implicit none - + ! Arguments class(psb_c_bjac_prec_type), intent(inout) :: prec integer(psb_ipk_), intent(out) :: info @@ -378,14 +392,14 @@ contains integer(psb_ipk_) :: err_act integer(psb_ipk_) :: i character(len=20) :: name - + info=psb_success_ name = 'psb_c_allocate_wrk' call psb_erractionsave(err_act) - + if (psb_get_errstatus().ne.0) goto 9999 - info = psb_success_ + info = psb_success_ if (allocated(prec%wrk)) then do i=1,size(prec%wrk) if (info == 0) call prec%wrk(i)%free(info) @@ -395,29 +409,29 @@ contains if (info /= 0) then info = psb_err_internal_error_; call psb_errpush(info,name,a_err="deallocate"); goto 9999 end if - + call psb_erractionrestore(err_act) return - + 9999 call psb_error_handler(err_act) return - + end subroutine psb_c_bjac_free_wrk function psb_c_bjac_is_allocated_wrk(prec) result(res) use psb_base_mod implicit none - + ! Arguments class(psb_c_bjac_prec_type), intent(in) :: prec logical :: res - ! In the base version we can say yes, because + ! In the base version we can say yes, because ! there is nothing to allocate res = allocated(prec%wrk) - + end function psb_c_bjac_is_allocated_wrk - + end module psb_c_bjacprec diff --git a/prec/psb_c_invk_fact_mod.f90 b/prec/psb_c_invk_fact_mod.f90 new file mode 100644 index 00000000..620a8adf --- /dev/null +++ b/prec/psb_c_invk_fact_mod.f90 @@ -0,0 +1,165 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG4PSBLAS, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_c_invk_fact_mod.f90 +! +! Module: psb_c_invk_fact_mod +! +! This module defines some interfaces used internally by the implementation of +! psb_c_invk_solver, but not visible to the end user. +! +! +module psb_c_invk_fact_mod + + use psb_base_mod + use psb_prec_const_mod + + interface psb_invk_fact + subroutine psb_c_invk_bld(a,fill1, fill2,lmat,d,umat,desc,info,blck) + ! import + import psb_cspmat_type, psb_ipk_, psb_spk_, psb_desc_type + ! Arguments + type(psb_cspmat_type), intent(in), target :: a + integer(psb_ipk_), intent(in) :: fill1, fill2 + type(psb_cspmat_type), intent(inout) :: lmat, umat + complex(psb_spk_), allocatable :: d(:) + Type(psb_desc_type), Intent(inout) :: desc + integer(psb_ipk_), intent(out) :: info + type(psb_cspmat_type), intent(in), optional :: blck + end subroutine + end interface + + ! Inner workings + interface psb_sparse_invk + subroutine psb_csparse_invk(n,a,z,fill_in,info,inlevs) + ! Import + import psb_ipk_, psb_cspmat_type + ! Arguments + integer(psb_ipk_), intent(in) :: n + type(psb_cspmat_type), intent(in) :: a + type(psb_cspmat_type), intent(inout) :: z + integer(psb_ipk_), intent(in) :: fill_in + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: inlevs(:) + end subroutine psb_csparse_invk + end interface + + interface psb_invk_inv + subroutine psb_cinvk_inv(fill_in,i,row,rowlevs,heap,uia1,uia2,uaspk,uplevs,& + & nidx,idxs,info) + + use psb_base_mod, only : psb_cspmat_type, psb_spk_, psb_i_heap, psb_ipk_ + implicit none + + ! Arguments + type(psb_i_heap), intent(inout) :: heap + integer(psb_ipk_), intent(in) :: i, fill_in + integer(psb_ipk_), intent(inout) :: nidx,info + integer(psb_ipk_), intent(inout) :: rowlevs(:) + integer(psb_ipk_), allocatable, intent(inout) :: idxs(:) + integer(psb_ipk_), intent(in) :: uia1(:),uia2(:),uplevs(:) + complex(psb_spk_), intent(in) :: uaspk(:) + complex(psb_spk_), intent(inout) :: row(:) + + + end subroutine psb_cinvk_inv + end interface + + interface psb_invk_copyin + subroutine psb_c_invk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,& + & ktrw,trw,info,sign,inlevs) + ! Import + use psb_base_mod, only : psb_c_csr_sparse_mat, psb_c_coo_sparse_mat,& + & psb_spk_, psb_i_heap, psb_ipk_ + implicit none + ! Arguments + type(psb_c_csr_sparse_mat), intent(in) :: a + type(psb_c_coo_sparse_mat), intent(inout) :: trw + integer(psb_ipk_), intent(in) :: i,m,jmin,jmax + integer(psb_ipk_), intent(inout) :: ktrw,info + integer(psb_ipk_), intent(inout) :: rowlevs(:) + complex(psb_spk_), intent(inout) :: row(:) + type(psb_i_heap), intent(inout) :: heap + real(psb_spk_), optional, intent(in) :: sign + integer(psb_ipk_), intent(in), optional :: inlevs(:) + end subroutine psb_c_invk_copyin + end interface + + interface psb_invk_copyout + subroutine psb_c_invk_copyout(fill_in,i,m,row,rowlevs,nidx,idxs,& + & l2,uia1,uia2,uaspk,info) + use psb_base_mod, only : psb_cspmat_type, psb_spk_, psb_i_heap, psb_ipk_ + implicit none + ! Arguments + integer(psb_ipk_), intent(in) :: fill_in, i, m, nidx + integer(psb_ipk_), intent(inout) :: l2, info + integer(psb_ipk_), intent(inout) :: rowlevs(:), idxs(:) + integer(psb_ipk_), allocatable, intent(inout) :: uia1(:), uia2(:) + complex(psb_spk_), allocatable, intent(inout) :: uaspk(:) + complex(psb_spk_), intent(inout) :: row(:) + end subroutine psb_c_invk_copyout + end interface + +end module diff --git a/prec/psb_c_invt_fact_mod.f90 b/prec/psb_c_invt_fact_mod.f90 new file mode 100644 index 00000000..841c39b1 --- /dev/null +++ b/prec/psb_c_invt_fact_mod.f90 @@ -0,0 +1,168 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG4PSBLAS, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_c_invt_fact_mod.f90 +! +! Module: psb_c_invt_fact_mod +! +! This module defines some interfaces used internally by the implementation of +! psb_c_invt_solver, but not visible to the end user. +! +! +module psb_c_invt_fact_mod + + use psb_base_mod + use psb_prec_const_mod + + interface psb_invt_fact + subroutine psb_c_invt_bld(a,fillin,invfill,thresh,invthresh,& + & lmat,d,umat,desc,info,blck) + ! Import + import psb_cspmat_type, psb_spk_, psb_ipk_, psb_desc_type + ! Arguments + type(psb_cspmat_type), intent(in), target :: a + integer(psb_ipk_), intent(in) :: fillin,invfill + real(psb_spk_), intent(in) :: thresh + real(psb_spk_), intent(in) :: invthresh + type(psb_cspmat_type), intent(inout) :: lmat, umat + complex(psb_spk_), allocatable :: d(:) + Type(psb_desc_type), Intent(inout) :: desc + integer(psb_ipk_), intent(out) :: info + type(psb_cspmat_type), intent(in), optional :: blck + end subroutine psb_c_invt_bld + end interface + + ! Interfaces for the inner workings + + interface + subroutine psb_csparse_invt(n,a,z,nzrmax,sp_thresh,info) + ! Import + import psb_cspmat_type, psb_spk_, psb_ipk_ + ! Arguments + integer(psb_ipk_), intent(in) :: n + type(psb_cspmat_type), intent(in) :: a + type(psb_cspmat_type), intent(inout) :: z + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_spk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(out) :: info + end subroutine psb_csparse_invt + end interface + + interface + subroutine psb_c_invt_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,nrmi,row,heap,& + & irwt,ktrw,trw,info,sign) + ! Import + import psb_c_csr_sparse_mat, psb_c_coo_sparse_mat, psb_spk_, & + & psb_ipk_, psb_i_heap + ! Arguments + type(psb_c_csr_sparse_mat), intent(in) :: a + type(psb_c_coo_sparse_mat), intent(inout) :: trw + integer(psb_ipk_), intent(in) :: i, m,jmin,jmax,jd + integer(psb_ipk_), intent(inout) :: ktrw,nlw,nup,jmaxup,info + integer(psb_ipk_), intent(inout) :: irwt(:) + real(psb_spk_), intent(inout) :: nrmi + complex(psb_spk_), intent(inout) :: row(:) + type(psb_i_heap), intent(inout) :: heap + real(psb_spk_), intent(in), optional :: sign + end subroutine psb_c_invt_copyin + end interface + + interface + subroutine psb_c_invt_copyout(fill_in,thres,i,m,nlw,nup,jmaxup,nrmi,row, & + & nidx,idxs,l2,ja,irp,val,info) + ! Import + import psb_spk_, psb_ipk_ + ! Arguments + integer(psb_ipk_), intent(in) :: fill_in,i,m,nidx,nlw,nup,jmaxup + integer(psb_ipk_), intent(in) :: idxs(:) + integer(psb_ipk_), intent(inout) :: l2, info + integer(psb_ipk_), allocatable, intent(inout) :: ja(:),irp(:) + real(psb_spk_), intent(in) :: thres,nrmi + complex(psb_spk_),allocatable, intent(inout) :: val(:) + complex(psb_spk_), intent(inout) :: row(:) + end subroutine psb_c_invt_copyout + end interface + + interface psb_invt_inv + subroutine psb_c_invt_inv(thres,i,nrmi,row,heap,irwt,ja,irp,val, & + & nidx,idxs,info) + ! import + import psb_spk_, psb_i_heap, psb_ipk_ + ! Arguments + type(psb_i_heap), intent(inout) :: heap + integer(psb_ipk_), intent(in) :: i + integer(psb_ipk_), intent(inout) :: nidx,info + integer(psb_ipk_), intent(inout) :: irwt(:) + real(psb_spk_), intent(in) :: thres,nrmi + integer(psb_ipk_), allocatable, intent(inout) :: idxs(:) + integer(psb_ipk_), intent(in) :: ja(:),irp(:) + complex(psb_spk_), intent(in) :: val(:) + complex(psb_spk_), intent(inout) :: row(:) + end subroutine + end interface + +contains + +end module psb_c_invt_fact_mod diff --git a/prec/psb_c_prec_type.f90 b/prec/psb_c_prec_type.f90 index cd5e64e4..38080cee 100644 --- a/prec/psb_c_prec_type.f90 +++ b/prec/psb_c_prec_type.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,8 +27,8 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! Module to define PREC_DATA, !! !! structure for preconditioning. !! @@ -54,6 +54,10 @@ module psb_c_prec_type procedure, pass(prec) :: build => psb_cprecbld procedure, pass(prec) :: init => psb_cprecinit procedure, pass(prec) :: descr => psb_cfile_prec_descr + procedure, pass(prec) :: cseti => psb_ccprecseti + procedure, pass(prec) :: csetc => psb_ccprecsetc + procedure, pass(prec) :: csetr => psb_ccprecsetr + generic, public :: set => cseti, csetc, csetr procedure, pass(prec) :: allocate_wrk => psb_c_allocate_wrk procedure, pass(prec) :: free_wrk => psb_c_free_wrk procedure, pass(prec) :: is_allocated_wrk => psb_c_is_allocated_wrk @@ -102,7 +106,7 @@ module psb_c_prec_type module procedure psb_cprec_sizeof end interface - interface + interface subroutine psb_c_apply2_vect(prec,x,y,desc_data,info,trans,work) import :: psb_ipk_, psb_desc_type, psb_cprec_type, psb_c_vect_type, psb_spk_ type(psb_desc_type),intent(in) :: desc_data @@ -114,8 +118,8 @@ module psb_c_prec_type complex(psb_spk_),intent(inout), optional, target :: work(:) end subroutine psb_c_apply2_vect end interface - - interface + + interface subroutine psb_c_apply1_vect(prec,x,desc_data,info,trans,work) import :: psb_ipk_, psb_desc_type, psb_cprec_type, psb_c_vect_type, psb_spk_ type(psb_desc_type),intent(in) :: desc_data @@ -126,7 +130,7 @@ module psb_c_prec_type complex(psb_spk_),intent(inout), optional, target :: work(:) end subroutine psb_c_apply1_vect end interface - + interface subroutine psb_c_apply2v(prec,x,y,desc_data,info,trans,work) import :: psb_ipk_, psb_desc_type, psb_cprec_type, psb_c_vect_type, psb_spk_ @@ -139,8 +143,8 @@ module psb_c_prec_type complex(psb_spk_),intent(inout), optional, target :: work(:) end subroutine psb_c_apply2v end interface - - interface + + interface subroutine psb_c_apply1v(prec,x,desc_data,info,trans) import :: psb_ipk_, psb_desc_type, psb_cprec_type, psb_c_vect_type, psb_spk_ type(psb_desc_type),intent(in) :: desc_data @@ -150,56 +154,89 @@ module psb_c_prec_type character(len=1), optional :: trans end subroutine psb_c_apply1v end interface - + + interface + subroutine psb_ccprecseti(prec,what,val,info,ilev,ilmax,pos,idx) + import :: psb_cprec_type, psb_cspmat_type, psb_desc_type, psb_spk_, & + & psb_ipk_ + class(psb_cprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + integer(psb_ipk_), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + end subroutine psb_ccprecseti + subroutine psb_ccprecsetr(prec,what,val,info,ilev,ilmax,pos,idx) + import :: psb_cprec_type, psb_cspmat_type, psb_desc_type, psb_spk_, & + & psb_ipk_ + class(psb_cprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + real(psb_spk_), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + end subroutine psb_ccprecsetr + subroutine psb_ccprecsetc(prec,what,string,info,ilev,ilmax,pos,idx) + import :: psb_cprec_type, psb_cspmat_type, psb_desc_type, psb_spk_, & + & psb_ipk_ + class(psb_cprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + character(len=*), intent(in) :: string + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + end subroutine psb_ccprecsetc +end interface + contains subroutine psb_cfile_prec_descr(prec,iout, root) use psb_base_mod - implicit none + implicit none class(psb_cprec_type), intent(in) :: prec integer(psb_ipk_), intent(in), optional :: iout integer(psb_ipk_), intent(in), optional :: root integer(psb_ipk_) :: iout_,info - character(len=20) :: name='prec_descr' - - if (present(iout)) then + character(len=20) :: name='prec_descr' + + if (present(iout)) then iout_ = iout else - iout_ = 6 + iout_ = 6 end if - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") end if call prec%prec%descr(iout=iout,root=root) - + end subroutine psb_cfile_prec_descr subroutine psb_c_prec_dump(prec,info,prefix,head) - implicit none + implicit none type(psb_cprec_type), intent(in) :: prec integer(psb_ipk_), intent(out) :: info character(len=*), intent(in), optional :: prefix,head - ! len of prefix_ + ! len of prefix_ info = 0 - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = -1 write(psb_err_unit,*) 'Trying to dump a non-built preconditioner' return end if - + call prec%prec%dump(info,prefix,head) - - + + end subroutine psb_c_prec_dump subroutine psb_c_allocate_wrk(prec,info,vmold,desc) use psb_base_mod implicit none - + ! Arguments class(psb_cprec_type), intent(inout) :: prec integer(psb_ipk_), intent(out) :: info @@ -209,33 +246,33 @@ contains ! Local variables integer(psb_ipk_) :: err_act character(len=20) :: name - + info=psb_success_ name = 'psb_c_allocate_wrk' call psb_erractionsave(err_act) - + if (psb_get_errstatus().ne.0) goto 9999 - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = -1 write(psb_err_unit,*) 'Trying to allocate wrk to a non-built preconditioner' return end if - + call prec%prec%allocate_wrk(info,vmold=vmold,desc=desc) - + call psb_erractionrestore(err_act) return - + 9999 call psb_error_handler(err_act) return - + end subroutine psb_c_allocate_wrk subroutine psb_c_free_wrk(prec,info) use psb_base_mod implicit none - + ! Arguments class(psb_cprec_type), intent(inout) :: prec integer(psb_ipk_), intent(out) :: info @@ -243,47 +280,47 @@ contains ! Local variables integer(psb_ipk_) :: err_act character(len=20) :: name - + info=psb_success_ name = 'psb_c_free_wrk' call psb_erractionsave(err_act) - + if (psb_get_errstatus().ne.0) goto 9999 - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = -1 write(psb_err_unit,*) 'Trying to free a non-built preconditioner' return end if - + call prec%prec%free_wrk(info) - + call psb_erractionrestore(err_act) return - + 9999 call psb_error_handler(err_act) return - + end subroutine psb_c_free_wrk function psb_c_is_allocated_wrk(prec) result(res) implicit none - + ! Arguments class(psb_cprec_type), intent(in) :: prec logical :: res if (.not.allocated(prec%prec)) then res = .false. - else + else res = prec%prec%is_allocated_wrk() end if - + end function psb_c_is_allocated_wrk subroutine psb_c_precfree(p,info) use psb_base_mod - implicit none + implicit none type(psb_cprec_type), intent(inout) :: p integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: me, err_act,i @@ -303,14 +340,14 @@ contains return 9999 call psb_error_handler(err_act) - + return end subroutine psb_c_precfree subroutine psb_c_prec_free(prec,info) use psb_base_mod - implicit none + implicit none class(psb_cprec_type), intent(inout) :: prec integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: me, err_act,i @@ -324,7 +361,7 @@ contains me=-1 - if (allocated(prec%prec)) then + if (allocated(prec%prec)) then call prec%prec%free(info) if (info /= psb_success_) goto 9999 deallocate(prec%prec,stat=info) @@ -343,26 +380,26 @@ contains class(psb_cprec_type), intent(in) :: prec integer(psb_epk_) :: val integer(psb_ipk_) :: i - + val = 0 - if (allocated(prec%prec)) then + if (allocated(prec%prec)) then val = val + prec%prec%sizeof() end if - + end function psb_cprec_sizeof subroutine psb_c_prec_clone(prec,precout,info) - implicit none + implicit none class(psb_cprec_type), intent(inout) :: prec class(psb_cprec_type), intent(inout) :: precout integer(psb_ipk_), intent(out) :: info info = psb_success_ call prec%free(info) - if (allocated(prec%prec)) then + if (allocated(prec%prec)) then call prec%prec%clone(precout%prec,info) end if - + end subroutine psb_c_prec_clone end module psb_c_prec_type diff --git a/prec/psb_d_ainv_fact_mod.f90 b/prec/psb_d_ainv_fact_mod.f90 new file mode 100644 index 00000000..8eb6fbc8 --- /dev/null +++ b/prec/psb_d_ainv_fact_mod.f90 @@ -0,0 +1,98 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from MLD2P4, original copyright below. +! +! +! MLD2P4 version 2.2 +! MultiLevel Domain Decomposition Parallel Preconditioners Package +! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the MLD2P4 group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MLD2P4 GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! +! File: psb_d_ainv_fact_mod.f90 +! +! Module: psb_d_ainv_fact_mod +! +! This module defines some interfaces used internally by the implementation of +! psb_d_ainv_solver, but not visible to the end user. +! +! +module psb_d_ainv_fact_mod + use psb_base_mod + use psb_prec_const_mod + + interface psb_ainv_fact + subroutine psb_d_ainv_bld(a,alg,fillin,thresh,wmat,d,zmat,desc,info,blck,iscale) + import psb_dspmat_type, psb_dpk_, psb_ipk_, psb_desc_type + type(psb_dspmat_type), intent(in), target :: a + integer(psb_ipk_), intent(in) :: fillin,alg + real(psb_dpk_), intent(in) :: thresh + type(psb_dspmat_type), intent(inout) :: wmat, zmat + real(psb_dpk_), allocatable :: d(:) + Type(psb_desc_type), Intent(in) :: desc + integer(psb_ipk_), intent(out) :: info + type(psb_dspmat_type), intent(in), optional :: blck + integer(psb_ipk_), intent(in), optional :: iscale + end subroutine psb_d_ainv_bld + end interface + +end module psb_d_ainv_fact_mod diff --git a/prec/psb_d_ainv_tools_mod.f90 b/prec/psb_d_ainv_tools_mod.f90 new file mode 100644 index 00000000..7329533b --- /dev/null +++ b/prec/psb_d_ainv_tools_mod.f90 @@ -0,0 +1,132 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG-AINV, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! +! +! +! +module psb_d_ainv_tools_mod + + interface sp_drop + subroutine psb_d_sp_drop(idiag,nzrmax,sp_thresh,nz,iz,valz,info) + use psb_base_mod, only : psb_dpk_, psb_ipk_ + implicit none + real(psb_dpk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(in) :: idiag, nzrmax + integer(psb_ipk_), intent(inout) :: nz + integer(psb_ipk_), intent(inout) :: iz(:) + real(psb_dpk_), intent(inout) :: valz(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_sp_drop + end interface + + interface rwclip + subroutine psb_d_rwclip(nz,ia,ja,val,imin,imax,jmin,jmax) + use psb_base_mod, only : psb_dpk_, psb_ipk_ + + implicit none + integer(psb_ipk_), intent(inout) :: nz + integer(psb_ipk_), intent(inout) :: ia(*), ja(*) + real(psb_dpk_), intent(inout) :: val(*) + integer(psb_ipk_), intent(in) :: imin,imax,jmin,jmax + end subroutine psb_d_rwclip + end interface + + interface sparsify + subroutine psb_d_sparsify(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,info, & + & istart,iheap,ikr) + use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_i_heap + implicit none + + real(psb_dpk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(in) :: idiag, n, nzrmax + real(psb_dpk_), intent(inout) :: zw(:) + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), intent(out) :: iz(:) + real(psb_dpk_), intent(out) :: valz(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: istart + type(psb_i_heap), optional :: iheap + integer(psb_ipk_), optional :: ikr(:) + end subroutine psb_d_sparsify + subroutine psb_d_sparsify_list(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,lhead,listv,ikr,info) + use psb_base_mod, only : psb_dpk_, psb_ipk_ + implicit none + + real(psb_dpk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(in) :: idiag, n, nzrmax + real(psb_dpk_), intent(inout) :: zw(:) + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), intent(out) :: iz(:) + real(psb_dpk_), intent(out) :: valz(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(inout) :: lhead, listv(:) + integer(psb_ipk_) :: ikr(:) + end subroutine psb_d_sparsify_list + + end interface + +end module psb_d_ainv_tools_mod diff --git a/prec/psb_d_biconjg_mod.F90 b/prec/psb_d_biconjg_mod.F90 new file mode 100644 index 00000000..09358744 --- /dev/null +++ b/prec/psb_d_biconjg_mod.F90 @@ -0,0 +1,367 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG-AINV, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +module psb_d_biconjg_mod + + use psb_base_mod + use psb_prec_const_mod + + interface psb_sparse_biconjg + module procedure psb_dsparse_biconjg + end interface + + abstract interface + subroutine psb_dsparse_biconjg_variant(n,a,p,z,w,nzrmax,sp_thresh,info) + use psb_base_mod, only : psb_d_csr_sparse_mat, psb_d_csc_sparse_mat, & + & psb_dpk_, psb_ipk_ + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_d_csr_sparse_mat), intent(in) :: a + type(psb_d_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_dpk_), intent(in) :: sp_thresh + real(psb_dpk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_dsparse_biconjg_variant + end interface + + + procedure(psb_dsparse_biconjg_variant) :: psb_dsparse_biconjg_llk,& + & psb_dsparse_biconjg_s_llk, psb_dsparse_biconjg_s_ft_llk,& + & psb_dsparse_biconjg_llk_noth, psb_dsparse_biconjg_mlk + +#if defined(HAVE_TUMA_SAINV) + procedure(psb_dsparse_biconjg_variant) :: psb_dsparse_tuma_sainv,& + & psb_dsparse_tuma_lainv +#endif + +contains + + subroutine psb_dsparse_biconjg(alg,n,acsr,p,z,w,nzrmax,sp_thresh,info) + use psb_base_mod + use psb_prec_const_mod + integer(psb_ipk_), intent(in) :: alg,n + type(psb_d_csr_sparse_mat), intent(in) :: acsr + type(psb_dspmat_type), intent(out) :: z, w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_dpk_), intent(in) :: sp_thresh + real(psb_dpk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + type(psb_d_csc_sparse_mat) :: zcsc,wcsc + integer(psb_ipk_) :: i,j,k,nrm + integer(psb_ipk_) :: err_act + character(len=20) :: name='psb_sparse_biconjg' + integer(psb_ipk_), parameter :: variant=1 + + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + if (size(p) precout) - type is (psb_d_bjac_prec_type) + type is (psb_d_bjac_prec_type) call pout%set_ctxt(prec%get_ctxt()) - if (allocated(prec%av)) then + if (allocated(prec%av)) then allocate(pout%av(size(prec%av)),stat=info) - do i=1,size(prec%av) + do i=1,size(prec%av) if (info /= psb_success_) exit call prec%av(i)%clone(pout%av(i),info) enddo if (info /= psb_success_) goto 9999 end if - if (allocated(prec%dv)) then + if (allocated(prec%dv)) then allocate(pout%dv,stat=info) if (info == 0) call prec%dv%clone(pout%dv,info) end if @@ -316,7 +330,7 @@ contains subroutine psb_d_bjac_allocate_wrk(prec,info,vmold,desc) use psb_base_mod implicit none - + ! Arguments class(psb_d_bjac_prec_type), intent(inout) :: prec integer(psb_ipk_), intent(out) :: info @@ -326,11 +340,11 @@ contains ! Local variables integer(psb_ipk_) :: err_act, i character(len=20) :: name - + info=psb_success_ name = 'psb_d_allocate_wrk' call psb_erractionsave(err_act) - + if (psb_get_errstatus().ne.0) goto 9999 if (allocated(prec%wrk)) then if (size(prec%wrk)<2) then @@ -343,11 +357,11 @@ contains if (info /= 0) then info = psb_err_internal_error_; call psb_errpush(info,name,a_err="deallocate"); goto 9999 end if - + if (.not.allocated(prec%wrk)) then - if (.not.present(desc)) then + if (.not.present(desc)) then info = psb_err_internal_error_; call psb_errpush(info,name,a_err="no desc?"); goto 9999 - end if + end if allocate(prec%wrk(2),stat=info) do i=1, 2 if (info == 0) call psb_geall(prec%wrk(i),desc,info) @@ -357,19 +371,19 @@ contains if (info /= 0) then info = psb_err_internal_error_; call psb_errpush(info,name,a_err="allocate"); goto 9999 end if - + call psb_erractionrestore(err_act) return - + 9999 call psb_error_handler(err_act) return - + end subroutine psb_d_bjac_allocate_wrk subroutine psb_d_bjac_free_wrk(prec,info) use psb_base_mod implicit none - + ! Arguments class(psb_d_bjac_prec_type), intent(inout) :: prec integer(psb_ipk_), intent(out) :: info @@ -378,14 +392,14 @@ contains integer(psb_ipk_) :: err_act integer(psb_ipk_) :: i character(len=20) :: name - + info=psb_success_ name = 'psb_d_allocate_wrk' call psb_erractionsave(err_act) - + if (psb_get_errstatus().ne.0) goto 9999 - info = psb_success_ + info = psb_success_ if (allocated(prec%wrk)) then do i=1,size(prec%wrk) if (info == 0) call prec%wrk(i)%free(info) @@ -395,29 +409,29 @@ contains if (info /= 0) then info = psb_err_internal_error_; call psb_errpush(info,name,a_err="deallocate"); goto 9999 end if - + call psb_erractionrestore(err_act) return - + 9999 call psb_error_handler(err_act) return - + end subroutine psb_d_bjac_free_wrk function psb_d_bjac_is_allocated_wrk(prec) result(res) use psb_base_mod implicit none - + ! Arguments class(psb_d_bjac_prec_type), intent(in) :: prec logical :: res - ! In the base version we can say yes, because + ! In the base version we can say yes, because ! there is nothing to allocate res = allocated(prec%wrk) - + end function psb_d_bjac_is_allocated_wrk - + end module psb_d_bjacprec diff --git a/prec/psb_d_invk_fact_mod.f90 b/prec/psb_d_invk_fact_mod.f90 new file mode 100644 index 00000000..2bd97198 --- /dev/null +++ b/prec/psb_d_invk_fact_mod.f90 @@ -0,0 +1,165 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG4PSBLAS, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_d_invk_fact_mod.f90 +! +! Module: psb_d_invk_fact_mod +! +! This module defines some interfaces used internally by the implementation of +! psb_d_invk_solver, but not visible to the end user. +! +! +module psb_d_invk_fact_mod + + use psb_base_mod + use psb_prec_const_mod + + interface psb_invk_fact + subroutine psb_d_invk_bld(a,fill1, fill2,lmat,d,umat,desc,info,blck) + ! import + import psb_dspmat_type, psb_ipk_, psb_dpk_, psb_desc_type + ! Arguments + type(psb_dspmat_type), intent(in), target :: a + integer(psb_ipk_), intent(in) :: fill1, fill2 + type(psb_dspmat_type), intent(inout) :: lmat, umat + real(psb_dpk_), allocatable :: d(:) + Type(psb_desc_type), Intent(inout) :: desc + integer(psb_ipk_), intent(out) :: info + type(psb_dspmat_type), intent(in), optional :: blck + end subroutine + end interface + + ! Inner workings + interface psb_sparse_invk + subroutine psb_dsparse_invk(n,a,z,fill_in,info,inlevs) + ! Import + import psb_ipk_, psb_dspmat_type + ! Arguments + integer(psb_ipk_), intent(in) :: n + type(psb_dspmat_type), intent(in) :: a + type(psb_dspmat_type), intent(inout) :: z + integer(psb_ipk_), intent(in) :: fill_in + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: inlevs(:) + end subroutine psb_dsparse_invk + end interface + + interface psb_invk_inv + subroutine psb_dinvk_inv(fill_in,i,row,rowlevs,heap,uia1,uia2,uaspk,uplevs,& + & nidx,idxs,info) + + use psb_base_mod, only : psb_dspmat_type, psb_dpk_, psb_i_heap, psb_ipk_ + implicit none + + ! Arguments + type(psb_i_heap), intent(inout) :: heap + integer(psb_ipk_), intent(in) :: i, fill_in + integer(psb_ipk_), intent(inout) :: nidx,info + integer(psb_ipk_), intent(inout) :: rowlevs(:) + integer(psb_ipk_), allocatable, intent(inout) :: idxs(:) + integer(psb_ipk_), intent(in) :: uia1(:),uia2(:),uplevs(:) + real(psb_dpk_), intent(in) :: uaspk(:) + real(psb_dpk_), intent(inout) :: row(:) + + + end subroutine psb_dinvk_inv + end interface + + interface psb_invk_copyin + subroutine psb_d_invk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,& + & ktrw,trw,info,sign,inlevs) + ! Import + use psb_base_mod, only : psb_d_csr_sparse_mat, psb_d_coo_sparse_mat,& + & psb_dpk_, psb_i_heap, psb_ipk_ + implicit none + ! Arguments + type(psb_d_csr_sparse_mat), intent(in) :: a + type(psb_d_coo_sparse_mat), intent(inout) :: trw + integer(psb_ipk_), intent(in) :: i,m,jmin,jmax + integer(psb_ipk_), intent(inout) :: ktrw,info + integer(psb_ipk_), intent(inout) :: rowlevs(:) + real(psb_dpk_), intent(inout) :: row(:) + type(psb_i_heap), intent(inout) :: heap + real(psb_dpk_), optional, intent(in) :: sign + integer(psb_ipk_), intent(in), optional :: inlevs(:) + end subroutine psb_d_invk_copyin + end interface + + interface psb_invk_copyout + subroutine psb_d_invk_copyout(fill_in,i,m,row,rowlevs,nidx,idxs,& + & l2,uia1,uia2,uaspk,info) + use psb_base_mod, only : psb_dspmat_type, psb_dpk_, psb_i_heap, psb_ipk_ + implicit none + ! Arguments + integer(psb_ipk_), intent(in) :: fill_in, i, m, nidx + integer(psb_ipk_), intent(inout) :: l2, info + integer(psb_ipk_), intent(inout) :: rowlevs(:), idxs(:) + integer(psb_ipk_), allocatable, intent(inout) :: uia1(:), uia2(:) + real(psb_dpk_), allocatable, intent(inout) :: uaspk(:) + real(psb_dpk_), intent(inout) :: row(:) + end subroutine psb_d_invk_copyout + end interface + +end module diff --git a/prec/psb_d_invt_fact_mod.f90 b/prec/psb_d_invt_fact_mod.f90 new file mode 100644 index 00000000..f38c1c2b --- /dev/null +++ b/prec/psb_d_invt_fact_mod.f90 @@ -0,0 +1,168 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG4PSBLAS, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_d_invt_fact_mod.f90 +! +! Module: psb_d_invt_fact_mod +! +! This module defines some interfaces used internally by the implementation of +! psb_d_invt_solver, but not visible to the end user. +! +! +module psb_d_invt_fact_mod + + use psb_base_mod + use psb_prec_const_mod + + interface psb_invt_fact + subroutine psb_d_invt_bld(a,fillin,invfill,thresh,invthresh,& + & lmat,d,umat,desc,info,blck) + ! Import + import psb_dspmat_type, psb_dpk_, psb_ipk_, psb_desc_type + ! Arguments + type(psb_dspmat_type), intent(in), target :: a + integer(psb_ipk_), intent(in) :: fillin,invfill + real(psb_dpk_), intent(in) :: thresh + real(psb_dpk_), intent(in) :: invthresh + type(psb_dspmat_type), intent(inout) :: lmat, umat + real(psb_dpk_), allocatable :: d(:) + Type(psb_desc_type), Intent(inout) :: desc + integer(psb_ipk_), intent(out) :: info + type(psb_dspmat_type), intent(in), optional :: blck + end subroutine psb_d_invt_bld + end interface + + ! Interfaces for the inner workings + + interface + subroutine psb_dsparse_invt(n,a,z,nzrmax,sp_thresh,info) + ! Import + import psb_dspmat_type, psb_dpk_, psb_ipk_ + ! Arguments + integer(psb_ipk_), intent(in) :: n + type(psb_dspmat_type), intent(in) :: a + type(psb_dspmat_type), intent(inout) :: z + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_dpk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(out) :: info + end subroutine psb_dsparse_invt + end interface + + interface + subroutine psb_d_invt_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,nrmi,row,heap,& + & irwt,ktrw,trw,info,sign) + ! Import + import psb_d_csr_sparse_mat, psb_d_coo_sparse_mat, psb_dpk_, & + & psb_ipk_, psb_i_heap + ! Arguments + type(psb_d_csr_sparse_mat), intent(in) :: a + type(psb_d_coo_sparse_mat), intent(inout) :: trw + integer(psb_ipk_), intent(in) :: i, m,jmin,jmax,jd + integer(psb_ipk_), intent(inout) :: ktrw,nlw,nup,jmaxup,info + integer(psb_ipk_), intent(inout) :: irwt(:) + real(psb_dpk_), intent(inout) :: nrmi + real(psb_dpk_), intent(inout) :: row(:) + type(psb_i_heap), intent(inout) :: heap + real(psb_dpk_), intent(in), optional :: sign + end subroutine psb_d_invt_copyin + end interface + + interface + subroutine psb_d_invt_copyout(fill_in,thres,i,m,nlw,nup,jmaxup,nrmi,row, & + & nidx,idxs,l2,ja,irp,val,info) + ! Import + import psb_dpk_, psb_ipk_ + ! Arguments + integer(psb_ipk_), intent(in) :: fill_in,i,m,nidx,nlw,nup,jmaxup + integer(psb_ipk_), intent(in) :: idxs(:) + integer(psb_ipk_), intent(inout) :: l2, info + integer(psb_ipk_), allocatable, intent(inout) :: ja(:),irp(:) + real(psb_dpk_), intent(in) :: thres,nrmi + real(psb_dpk_),allocatable, intent(inout) :: val(:) + real(psb_dpk_), intent(inout) :: row(:) + end subroutine psb_d_invt_copyout + end interface + + interface psb_invt_inv + subroutine psb_d_invt_inv(thres,i,nrmi,row,heap,irwt,ja,irp,val, & + & nidx,idxs,info) + ! import + import psb_dpk_, psb_i_heap, psb_ipk_ + ! Arguments + type(psb_i_heap), intent(inout) :: heap + integer(psb_ipk_), intent(in) :: i + integer(psb_ipk_), intent(inout) :: nidx,info + integer(psb_ipk_), intent(inout) :: irwt(:) + real(psb_dpk_), intent(in) :: thres,nrmi + integer(psb_ipk_), allocatable, intent(inout) :: idxs(:) + integer(psb_ipk_), intent(in) :: ja(:),irp(:) + real(psb_dpk_), intent(in) :: val(:) + real(psb_dpk_), intent(inout) :: row(:) + end subroutine + end interface + +contains + +end module psb_d_invt_fact_mod diff --git a/prec/psb_d_prec_type.f90 b/prec/psb_d_prec_type.f90 index 6ba9d3be..391023bd 100644 --- a/prec/psb_d_prec_type.f90 +++ b/prec/psb_d_prec_type.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,8 +27,8 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! Module to define PREC_DATA, !! !! structure for preconditioning. !! @@ -54,6 +54,10 @@ module psb_d_prec_type procedure, pass(prec) :: build => psb_dprecbld procedure, pass(prec) :: init => psb_dprecinit procedure, pass(prec) :: descr => psb_dfile_prec_descr + procedure, pass(prec) :: cseti => psb_dcprecseti + procedure, pass(prec) :: csetc => psb_dcprecsetc + procedure, pass(prec) :: csetr => psb_dcprecsetr + generic, public :: set => cseti, csetc, csetr procedure, pass(prec) :: allocate_wrk => psb_d_allocate_wrk procedure, pass(prec) :: free_wrk => psb_d_free_wrk procedure, pass(prec) :: is_allocated_wrk => psb_d_is_allocated_wrk @@ -102,7 +106,7 @@ module psb_d_prec_type module procedure psb_dprec_sizeof end interface - interface + interface subroutine psb_d_apply2_vect(prec,x,y,desc_data,info,trans,work) import :: psb_ipk_, psb_desc_type, psb_dprec_type, psb_d_vect_type, psb_dpk_ type(psb_desc_type),intent(in) :: desc_data @@ -114,8 +118,8 @@ module psb_d_prec_type real(psb_dpk_),intent(inout), optional, target :: work(:) end subroutine psb_d_apply2_vect end interface - - interface + + interface subroutine psb_d_apply1_vect(prec,x,desc_data,info,trans,work) import :: psb_ipk_, psb_desc_type, psb_dprec_type, psb_d_vect_type, psb_dpk_ type(psb_desc_type),intent(in) :: desc_data @@ -126,7 +130,7 @@ module psb_d_prec_type real(psb_dpk_),intent(inout), optional, target :: work(:) end subroutine psb_d_apply1_vect end interface - + interface subroutine psb_d_apply2v(prec,x,y,desc_data,info,trans,work) import :: psb_ipk_, psb_desc_type, psb_dprec_type, psb_d_vect_type, psb_dpk_ @@ -139,8 +143,8 @@ module psb_d_prec_type real(psb_dpk_),intent(inout), optional, target :: work(:) end subroutine psb_d_apply2v end interface - - interface + + interface subroutine psb_d_apply1v(prec,x,desc_data,info,trans) import :: psb_ipk_, psb_desc_type, psb_dprec_type, psb_d_vect_type, psb_dpk_ type(psb_desc_type),intent(in) :: desc_data @@ -150,56 +154,89 @@ module psb_d_prec_type character(len=1), optional :: trans end subroutine psb_d_apply1v end interface - + + interface + subroutine psb_dcprecseti(prec,what,val,info,ilev,ilmax,pos,idx) + import :: psb_dprec_type, psb_dspmat_type, psb_desc_type, psb_dpk_, & + & psb_ipk_ + class(psb_dprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + integer(psb_ipk_), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + end subroutine psb_dcprecseti + subroutine psb_dcprecsetr(prec,what,val,info,ilev,ilmax,pos,idx) + import :: psb_dprec_type, psb_dspmat_type, psb_desc_type, psb_dpk_, & + & psb_ipk_ + class(psb_dprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + real(psb_dpk_), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + end subroutine psb_dcprecsetr + subroutine psb_dcprecsetc(prec,what,string,info,ilev,ilmax,pos,idx) + import :: psb_dprec_type, psb_dspmat_type, psb_desc_type, psb_dpk_, & + & psb_ipk_ + class(psb_dprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + character(len=*), intent(in) :: string + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + end subroutine psb_dcprecsetc +end interface + contains subroutine psb_dfile_prec_descr(prec,iout, root) use psb_base_mod - implicit none + implicit none class(psb_dprec_type), intent(in) :: prec integer(psb_ipk_), intent(in), optional :: iout integer(psb_ipk_), intent(in), optional :: root integer(psb_ipk_) :: iout_,info - character(len=20) :: name='prec_descr' - - if (present(iout)) then + character(len=20) :: name='prec_descr' + + if (present(iout)) then iout_ = iout else - iout_ = 6 + iout_ = 6 end if - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") end if call prec%prec%descr(iout=iout,root=root) - + end subroutine psb_dfile_prec_descr subroutine psb_d_prec_dump(prec,info,prefix,head) - implicit none + implicit none type(psb_dprec_type), intent(in) :: prec integer(psb_ipk_), intent(out) :: info character(len=*), intent(in), optional :: prefix,head - ! len of prefix_ + ! len of prefix_ info = 0 - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = -1 write(psb_err_unit,*) 'Trying to dump a non-built preconditioner' return end if - + call prec%prec%dump(info,prefix,head) - - + + end subroutine psb_d_prec_dump subroutine psb_d_allocate_wrk(prec,info,vmold,desc) use psb_base_mod implicit none - + ! Arguments class(psb_dprec_type), intent(inout) :: prec integer(psb_ipk_), intent(out) :: info @@ -209,33 +246,33 @@ contains ! Local variables integer(psb_ipk_) :: err_act character(len=20) :: name - + info=psb_success_ name = 'psb_d_allocate_wrk' call psb_erractionsave(err_act) - + if (psb_get_errstatus().ne.0) goto 9999 - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = -1 write(psb_err_unit,*) 'Trying to allocate wrk to a non-built preconditioner' return end if - + call prec%prec%allocate_wrk(info,vmold=vmold,desc=desc) - + call psb_erractionrestore(err_act) return - + 9999 call psb_error_handler(err_act) return - + end subroutine psb_d_allocate_wrk subroutine psb_d_free_wrk(prec,info) use psb_base_mod implicit none - + ! Arguments class(psb_dprec_type), intent(inout) :: prec integer(psb_ipk_), intent(out) :: info @@ -243,47 +280,47 @@ contains ! Local variables integer(psb_ipk_) :: err_act character(len=20) :: name - + info=psb_success_ name = 'psb_d_free_wrk' call psb_erractionsave(err_act) - + if (psb_get_errstatus().ne.0) goto 9999 - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = -1 write(psb_err_unit,*) 'Trying to free a non-built preconditioner' return end if - + call prec%prec%free_wrk(info) - + call psb_erractionrestore(err_act) return - + 9999 call psb_error_handler(err_act) return - + end subroutine psb_d_free_wrk function psb_d_is_allocated_wrk(prec) result(res) implicit none - + ! Arguments class(psb_dprec_type), intent(in) :: prec logical :: res if (.not.allocated(prec%prec)) then res = .false. - else + else res = prec%prec%is_allocated_wrk() end if - + end function psb_d_is_allocated_wrk subroutine psb_d_precfree(p,info) use psb_base_mod - implicit none + implicit none type(psb_dprec_type), intent(inout) :: p integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: me, err_act,i @@ -303,14 +340,14 @@ contains return 9999 call psb_error_handler(err_act) - + return end subroutine psb_d_precfree subroutine psb_d_prec_free(prec,info) use psb_base_mod - implicit none + implicit none class(psb_dprec_type), intent(inout) :: prec integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: me, err_act,i @@ -324,7 +361,7 @@ contains me=-1 - if (allocated(prec%prec)) then + if (allocated(prec%prec)) then call prec%prec%free(info) if (info /= psb_success_) goto 9999 deallocate(prec%prec,stat=info) @@ -343,26 +380,26 @@ contains class(psb_dprec_type), intent(in) :: prec integer(psb_epk_) :: val integer(psb_ipk_) :: i - + val = 0 - if (allocated(prec%prec)) then + if (allocated(prec%prec)) then val = val + prec%prec%sizeof() end if - + end function psb_dprec_sizeof subroutine psb_d_prec_clone(prec,precout,info) - implicit none + implicit none class(psb_dprec_type), intent(inout) :: prec class(psb_dprec_type), intent(inout) :: precout integer(psb_ipk_), intent(out) :: info info = psb_success_ call prec%free(info) - if (allocated(prec%prec)) then + if (allocated(prec%prec)) then call prec%prec%clone(precout%prec,info) end if - + end subroutine psb_d_prec_clone end module psb_d_prec_type diff --git a/prec/psb_prec_const_mod.f90 b/prec/psb_prec_const_mod.f90 index f7b32d2f..73c22e58 100644 --- a/prec/psb_prec_const_mod.f90 +++ b/prec/psb_prec_const_mod.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,8 +27,8 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! Module to define PREC_DATA, !! !! structure for preconditioning. !! @@ -43,19 +43,23 @@ module psb_prec_const_mod ! Entries in iprcparm: preconditioner type, factorization type, ! prolongation type, restriction type, renumbering algorithm, - ! number of overlap layers, pointer to SuperLU factors, - ! levels of fill in for ILU(N), + ! number of overlap layers, pointer to SuperLU factors, + ! levels of fill in for ILU(N), integer(psb_ipk_), parameter :: psb_p_type_=1, psb_f_type_=2 integer(psb_ipk_), parameter :: psb_ilu_fill_in_=8 + integer(psb_ipk_), parameter :: psb_ilu_ialg_=9 !Renumbering. SEE BELOW integer(psb_ipk_), parameter :: psb_renum_none_=0, psb_renum_glb_=1, psb_renum_gps_=2 integer(psb_ipk_), parameter :: psb_ifpsz=10 ! Entries in rprcparm: ILU(E) epsilon, smoother omega + integer(psb_ipk_), parameter :: psb_ilu_scale_=7 integer(psb_ipk_), parameter :: psb_fact_eps_=1 - integer(psb_ipk_), parameter :: psb_rfpsz=4 - ! Factorization types: none, ILU(N), ILU(E) - integer(psb_ipk_), parameter :: psb_f_none_=0,psb_f_ilu_n_=1 - ! Fields for sparse matrices ensembles: + integer(psb_ipk_), parameter :: psb_rfpsz=8 + ! Factorization types: none, ILU(0), ILU(N), ILU(N,E) + integer(psb_ipk_), parameter :: psb_f_none_=0,psb_f_ilu_n_=1,psb_f_ilu_k_=2,psb_f_ilu_t_=3 + ! Approximate Inverse factorization type: AINV, INVT, INVK + integer(psb_ipk_), parameter :: psb_f_ainv_=4, psb_f_invt_=5, psb_f_invk_=6 + ! Fields for sparse matrices ensembles: integer(psb_ipk_), parameter :: psb_l_pr_=1, psb_u_pr_=2, psb_bp_ilu_avsz=2 integer(psb_ipk_), parameter :: psb_max_avsz=psb_bp_ilu_avsz @@ -65,11 +69,21 @@ module psb_prec_const_mod integer(psb_ipk_), parameter :: psb_ilu_scale_none_ = 0 integer(psb_ipk_), parameter :: psb_ilu_scale_maxval_ = 1 integer(psb_ipk_), parameter :: psb_ilu_scale_diag_ = 2 - integer(psb_ipk_), parameter :: psb_ilu_scale_arwsum_ = 3 + integer(psb_ipk_), parameter :: psb_ilu_scale_arwsum_ = 3 integer(psb_ipk_), parameter :: psb_ilu_scale_aclsum_ = 4 integer(psb_ipk_), parameter :: psb_ilu_scale_arcsum_ = 5 - + ! Numerical parameters relative to Approximate Inverse Preconditioners + integer, parameter :: psb_inv_fillin_ = 3 + integer, parameter :: psb_ainv_alg_ = psb_inv_fillin_ + 1 + integer, parameter :: psb_inv_thresh_ = 3 + integer, parameter :: psb_ainv_llk_ = psb_inv_thresh_ + 1 + integer, parameter :: psb_ainv_s_llk_ = psb_ainv_llk_ + 1 + integer, parameter :: psb_ainv_s_ft_llk_ = psb_ainv_s_llk_ + 1 + integer, parameter :: psb_ainv_llk_noth_ = psb_ainv_s_ft_llk_ + 1 + integer, parameter :: psb_ainv_mlk_ = psb_ainv_llk_noth_ + 1 + integer, parameter :: psb_ainv_lmx_ = psb_ainv_mlk_ + interface psb_check_def module procedure psb_icheck_def, psb_scheck_def, psb_dcheck_def @@ -87,9 +101,9 @@ contains select case(iprec) case(psb_noprec_) pr_to_str='NOPREC' - case(psb_diag_) + case(psb_diag_) pr_to_str='DIAG' - case(psb_bjac_) + case(psb_bjac_) pr_to_str='BJAC' case default pr_to_str='???' @@ -125,7 +139,7 @@ contains integer(psb_ipk_), intent(inout) :: ip integer(psb_ipk_), intent(in) :: id character(len=*), intent(in) :: name - interface + interface function is_legal(i) import :: psb_ipk_ integer(psb_ipk_), intent(in) :: i @@ -133,7 +147,7 @@ contains end function is_legal end interface - if (.not.is_legal(ip)) then + if (.not.is_legal(ip)) then write(psb_err_unit,*) 'Illegal value for ',name,' :',ip, '. defaulting to ',id ip = id end if @@ -143,7 +157,7 @@ contains real(psb_spk_), intent(inout) :: ip real(psb_spk_), intent(in) :: id character(len=*), intent(in) :: name - interface + interface function is_legal(i) import :: psb_ipk_, psb_spk_ real(psb_spk_), intent(in) :: i @@ -151,7 +165,7 @@ contains end function is_legal end interface - if (.not.is_legal(ip)) then + if (.not.is_legal(ip)) then write(psb_err_unit,*) 'Illegal value for ',name,' :',ip, '. defaulting to ',id ip = id end if @@ -161,7 +175,7 @@ contains real(psb_dpk_), intent(inout) :: ip real(psb_dpk_), intent(in) :: id character(len=*), intent(in) :: name - interface + interface function is_legal(i) import :: psb_ipk_, psb_spk_, psb_dpk_ real(psb_dpk_), intent(in) :: i @@ -169,7 +183,7 @@ contains end function is_legal end interface - if (.not.is_legal(ip)) then + if (.not.is_legal(ip)) then write(psb_err_unit,*) 'Illegal value for ',name,' :',ip, '. defaulting to ',id ip = id end if diff --git a/prec/psb_s_ainv_fact_mod.f90 b/prec/psb_s_ainv_fact_mod.f90 new file mode 100644 index 00000000..bc7f1d12 --- /dev/null +++ b/prec/psb_s_ainv_fact_mod.f90 @@ -0,0 +1,98 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from MLD2P4, original copyright below. +! +! +! MLD2P4 version 2.2 +! MultiLevel Domain Decomposition Parallel Preconditioners Package +! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the MLD2P4 group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MLD2P4 GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! +! File: psb_s_ainv_fact_mod.f90 +! +! Module: psb_s_ainv_fact_mod +! +! This module defines some interfaces used internally by the implementation of +! psb_s_ainv_solver, but not visible to the end user. +! +! +module psb_s_ainv_fact_mod + use psb_base_mod + use psb_prec_const_mod + + interface psb_ainv_fact + subroutine psb_s_ainv_bld(a,alg,fillin,thresh,wmat,d,zmat,desc,info,blck,iscale) + import psb_sspmat_type, psb_spk_, psb_ipk_, psb_desc_type + type(psb_sspmat_type), intent(in), target :: a + integer(psb_ipk_), intent(in) :: fillin,alg + real(psb_spk_), intent(in) :: thresh + type(psb_sspmat_type), intent(inout) :: wmat, zmat + real(psb_spk_), allocatable :: d(:) + Type(psb_desc_type), Intent(in) :: desc + integer(psb_ipk_), intent(out) :: info + type(psb_sspmat_type), intent(in), optional :: blck + integer(psb_ipk_), intent(in), optional :: iscale + end subroutine psb_s_ainv_bld + end interface + +end module psb_s_ainv_fact_mod diff --git a/prec/psb_s_ainv_tools_mod.f90 b/prec/psb_s_ainv_tools_mod.f90 new file mode 100644 index 00000000..caa50164 --- /dev/null +++ b/prec/psb_s_ainv_tools_mod.f90 @@ -0,0 +1,132 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG-AINV, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! +! +! +! +module psb_s_ainv_tools_mod + + interface sp_drop + subroutine psb_s_sp_drop(idiag,nzrmax,sp_thresh,nz,iz,valz,info) + use psb_base_mod, only : psb_spk_, psb_ipk_ + implicit none + real(psb_spk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(in) :: idiag, nzrmax + integer(psb_ipk_), intent(inout) :: nz + integer(psb_ipk_), intent(inout) :: iz(:) + real(psb_spk_), intent(inout) :: valz(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_sp_drop + end interface + + interface rwclip + subroutine psb_s_rwclip(nz,ia,ja,val,imin,imax,jmin,jmax) + use psb_base_mod, only : psb_spk_, psb_ipk_ + + implicit none + integer(psb_ipk_), intent(inout) :: nz + integer(psb_ipk_), intent(inout) :: ia(*), ja(*) + real(psb_spk_), intent(inout) :: val(*) + integer(psb_ipk_), intent(in) :: imin,imax,jmin,jmax + end subroutine psb_s_rwclip + end interface + + interface sparsify + subroutine psb_s_sparsify(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,info, & + & istart,iheap,ikr) + use psb_base_mod, only : psb_spk_, psb_ipk_, psb_i_heap + implicit none + + real(psb_spk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(in) :: idiag, n, nzrmax + real(psb_spk_), intent(inout) :: zw(:) + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), intent(out) :: iz(:) + real(psb_spk_), intent(out) :: valz(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: istart + type(psb_i_heap), optional :: iheap + integer(psb_ipk_), optional :: ikr(:) + end subroutine psb_s_sparsify + subroutine psb_s_sparsify_list(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,lhead,listv,ikr,info) + use psb_base_mod, only : psb_spk_, psb_ipk_ + implicit none + + real(psb_spk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(in) :: idiag, n, nzrmax + real(psb_spk_), intent(inout) :: zw(:) + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), intent(out) :: iz(:) + real(psb_spk_), intent(out) :: valz(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(inout) :: lhead, listv(:) + integer(psb_ipk_) :: ikr(:) + end subroutine psb_s_sparsify_list + + end interface + +end module psb_s_ainv_tools_mod diff --git a/prec/psb_s_biconjg_mod.F90 b/prec/psb_s_biconjg_mod.F90 new file mode 100644 index 00000000..bc2aaefc --- /dev/null +++ b/prec/psb_s_biconjg_mod.F90 @@ -0,0 +1,367 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG-AINV, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +module psb_s_biconjg_mod + + use psb_base_mod + use psb_prec_const_mod + + interface psb_sparse_biconjg + module procedure psb_ssparse_biconjg + end interface + + abstract interface + subroutine psb_ssparse_biconjg_variant(n,a,p,z,w,nzrmax,sp_thresh,info) + use psb_base_mod, only : psb_s_csr_sparse_mat, psb_s_csc_sparse_mat, & + & psb_spk_, psb_ipk_ + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_s_csr_sparse_mat), intent(in) :: a + type(psb_s_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_spk_), intent(in) :: sp_thresh + real(psb_spk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_ssparse_biconjg_variant + end interface + + + procedure(psb_ssparse_biconjg_variant) :: psb_ssparse_biconjg_llk,& + & psb_ssparse_biconjg_s_llk, psb_ssparse_biconjg_s_ft_llk,& + & psb_ssparse_biconjg_llk_noth, psb_ssparse_biconjg_mlk + +#if defined(HAVE_TUMA_SAINV) + procedure(psb_ssparse_biconjg_variant) :: psb_ssparse_tuma_sainv,& + & psb_ssparse_tuma_lainv +#endif + +contains + + subroutine psb_ssparse_biconjg(alg,n,acsr,p,z,w,nzrmax,sp_thresh,info) + use psb_base_mod + use psb_prec_const_mod + integer(psb_ipk_), intent(in) :: alg,n + type(psb_s_csr_sparse_mat), intent(in) :: acsr + type(psb_sspmat_type), intent(out) :: z, w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_spk_), intent(in) :: sp_thresh + real(psb_spk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + type(psb_s_csc_sparse_mat) :: zcsc,wcsc + integer(psb_ipk_) :: i,j,k,nrm + integer(psb_ipk_) :: err_act + character(len=20) :: name='psb_sparse_biconjg' + integer(psb_ipk_), parameter :: variant=1 + + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + if (size(p) precout) - type is (psb_s_bjac_prec_type) + type is (psb_s_bjac_prec_type) call pout%set_ctxt(prec%get_ctxt()) - if (allocated(prec%av)) then + if (allocated(prec%av)) then allocate(pout%av(size(prec%av)),stat=info) - do i=1,size(prec%av) + do i=1,size(prec%av) if (info /= psb_success_) exit call prec%av(i)%clone(pout%av(i),info) enddo if (info /= psb_success_) goto 9999 end if - if (allocated(prec%dv)) then + if (allocated(prec%dv)) then allocate(pout%dv,stat=info) if (info == 0) call prec%dv%clone(pout%dv,info) end if @@ -316,7 +330,7 @@ contains subroutine psb_s_bjac_allocate_wrk(prec,info,vmold,desc) use psb_base_mod implicit none - + ! Arguments class(psb_s_bjac_prec_type), intent(inout) :: prec integer(psb_ipk_), intent(out) :: info @@ -326,11 +340,11 @@ contains ! Local variables integer(psb_ipk_) :: err_act, i character(len=20) :: name - + info=psb_success_ name = 'psb_s_allocate_wrk' call psb_erractionsave(err_act) - + if (psb_get_errstatus().ne.0) goto 9999 if (allocated(prec%wrk)) then if (size(prec%wrk)<2) then @@ -343,11 +357,11 @@ contains if (info /= 0) then info = psb_err_internal_error_; call psb_errpush(info,name,a_err="deallocate"); goto 9999 end if - + if (.not.allocated(prec%wrk)) then - if (.not.present(desc)) then + if (.not.present(desc)) then info = psb_err_internal_error_; call psb_errpush(info,name,a_err="no desc?"); goto 9999 - end if + end if allocate(prec%wrk(2),stat=info) do i=1, 2 if (info == 0) call psb_geall(prec%wrk(i),desc,info) @@ -357,19 +371,19 @@ contains if (info /= 0) then info = psb_err_internal_error_; call psb_errpush(info,name,a_err="allocate"); goto 9999 end if - + call psb_erractionrestore(err_act) return - + 9999 call psb_error_handler(err_act) return - + end subroutine psb_s_bjac_allocate_wrk subroutine psb_s_bjac_free_wrk(prec,info) use psb_base_mod implicit none - + ! Arguments class(psb_s_bjac_prec_type), intent(inout) :: prec integer(psb_ipk_), intent(out) :: info @@ -378,14 +392,14 @@ contains integer(psb_ipk_) :: err_act integer(psb_ipk_) :: i character(len=20) :: name - + info=psb_success_ name = 'psb_s_allocate_wrk' call psb_erractionsave(err_act) - + if (psb_get_errstatus().ne.0) goto 9999 - info = psb_success_ + info = psb_success_ if (allocated(prec%wrk)) then do i=1,size(prec%wrk) if (info == 0) call prec%wrk(i)%free(info) @@ -395,29 +409,29 @@ contains if (info /= 0) then info = psb_err_internal_error_; call psb_errpush(info,name,a_err="deallocate"); goto 9999 end if - + call psb_erractionrestore(err_act) return - + 9999 call psb_error_handler(err_act) return - + end subroutine psb_s_bjac_free_wrk function psb_s_bjac_is_allocated_wrk(prec) result(res) use psb_base_mod implicit none - + ! Arguments class(psb_s_bjac_prec_type), intent(in) :: prec logical :: res - ! In the base version we can say yes, because + ! In the base version we can say yes, because ! there is nothing to allocate res = allocated(prec%wrk) - + end function psb_s_bjac_is_allocated_wrk - + end module psb_s_bjacprec diff --git a/prec/psb_s_invk_fact_mod.f90 b/prec/psb_s_invk_fact_mod.f90 new file mode 100644 index 00000000..6b0d3553 --- /dev/null +++ b/prec/psb_s_invk_fact_mod.f90 @@ -0,0 +1,165 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG4PSBLAS, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_s_invk_fact_mod.f90 +! +! Module: psb_s_invk_fact_mod +! +! This module defines some interfaces used internally by the implementation of +! psb_s_invk_solver, but not visible to the end user. +! +! +module psb_s_invk_fact_mod + + use psb_base_mod + use psb_prec_const_mod + + interface psb_invk_fact + subroutine psb_s_invk_bld(a,fill1, fill2,lmat,d,umat,desc,info,blck) + ! import + import psb_sspmat_type, psb_ipk_, psb_spk_, psb_desc_type + ! Arguments + type(psb_sspmat_type), intent(in), target :: a + integer(psb_ipk_), intent(in) :: fill1, fill2 + type(psb_sspmat_type), intent(inout) :: lmat, umat + real(psb_spk_), allocatable :: d(:) + Type(psb_desc_type), Intent(inout) :: desc + integer(psb_ipk_), intent(out) :: info + type(psb_sspmat_type), intent(in), optional :: blck + end subroutine + end interface + + ! Inner workings + interface psb_sparse_invk + subroutine psb_ssparse_invk(n,a,z,fill_in,info,inlevs) + ! Import + import psb_ipk_, psb_sspmat_type + ! Arguments + integer(psb_ipk_), intent(in) :: n + type(psb_sspmat_type), intent(in) :: a + type(psb_sspmat_type), intent(inout) :: z + integer(psb_ipk_), intent(in) :: fill_in + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: inlevs(:) + end subroutine psb_ssparse_invk + end interface + + interface psb_invk_inv + subroutine psb_sinvk_inv(fill_in,i,row,rowlevs,heap,uia1,uia2,uaspk,uplevs,& + & nidx,idxs,info) + + use psb_base_mod, only : psb_sspmat_type, psb_spk_, psb_i_heap, psb_ipk_ + implicit none + + ! Arguments + type(psb_i_heap), intent(inout) :: heap + integer(psb_ipk_), intent(in) :: i, fill_in + integer(psb_ipk_), intent(inout) :: nidx,info + integer(psb_ipk_), intent(inout) :: rowlevs(:) + integer(psb_ipk_), allocatable, intent(inout) :: idxs(:) + integer(psb_ipk_), intent(in) :: uia1(:),uia2(:),uplevs(:) + real(psb_spk_), intent(in) :: uaspk(:) + real(psb_spk_), intent(inout) :: row(:) + + + end subroutine psb_sinvk_inv + end interface + + interface psb_invk_copyin + subroutine psb_s_invk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,& + & ktrw,trw,info,sign,inlevs) + ! Import + use psb_base_mod, only : psb_s_csr_sparse_mat, psb_s_coo_sparse_mat,& + & psb_spk_, psb_i_heap, psb_ipk_ + implicit none + ! Arguments + type(psb_s_csr_sparse_mat), intent(in) :: a + type(psb_s_coo_sparse_mat), intent(inout) :: trw + integer(psb_ipk_), intent(in) :: i,m,jmin,jmax + integer(psb_ipk_), intent(inout) :: ktrw,info + integer(psb_ipk_), intent(inout) :: rowlevs(:) + real(psb_spk_), intent(inout) :: row(:) + type(psb_i_heap), intent(inout) :: heap + real(psb_spk_), optional, intent(in) :: sign + integer(psb_ipk_), intent(in), optional :: inlevs(:) + end subroutine psb_s_invk_copyin + end interface + + interface psb_invk_copyout + subroutine psb_s_invk_copyout(fill_in,i,m,row,rowlevs,nidx,idxs,& + & l2,uia1,uia2,uaspk,info) + use psb_base_mod, only : psb_sspmat_type, psb_spk_, psb_i_heap, psb_ipk_ + implicit none + ! Arguments + integer(psb_ipk_), intent(in) :: fill_in, i, m, nidx + integer(psb_ipk_), intent(inout) :: l2, info + integer(psb_ipk_), intent(inout) :: rowlevs(:), idxs(:) + integer(psb_ipk_), allocatable, intent(inout) :: uia1(:), uia2(:) + real(psb_spk_), allocatable, intent(inout) :: uaspk(:) + real(psb_spk_), intent(inout) :: row(:) + end subroutine psb_s_invk_copyout + end interface + +end module diff --git a/prec/psb_s_invt_fact_mod.f90 b/prec/psb_s_invt_fact_mod.f90 new file mode 100644 index 00000000..2c9ce38c --- /dev/null +++ b/prec/psb_s_invt_fact_mod.f90 @@ -0,0 +1,168 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG4PSBLAS, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_s_invt_fact_mod.f90 +! +! Module: psb_s_invt_fact_mod +! +! This module defines some interfaces used internally by the implementation of +! psb_s_invt_solver, but not visible to the end user. +! +! +module psb_s_invt_fact_mod + + use psb_base_mod + use psb_prec_const_mod + + interface psb_invt_fact + subroutine psb_s_invt_bld(a,fillin,invfill,thresh,invthresh,& + & lmat,d,umat,desc,info,blck) + ! Import + import psb_sspmat_type, psb_spk_, psb_ipk_, psb_desc_type + ! Arguments + type(psb_sspmat_type), intent(in), target :: a + integer(psb_ipk_), intent(in) :: fillin,invfill + real(psb_spk_), intent(in) :: thresh + real(psb_spk_), intent(in) :: invthresh + type(psb_sspmat_type), intent(inout) :: lmat, umat + real(psb_spk_), allocatable :: d(:) + Type(psb_desc_type), Intent(inout) :: desc + integer(psb_ipk_), intent(out) :: info + type(psb_sspmat_type), intent(in), optional :: blck + end subroutine psb_s_invt_bld + end interface + + ! Interfaces for the inner workings + + interface + subroutine psb_ssparse_invt(n,a,z,nzrmax,sp_thresh,info) + ! Import + import psb_sspmat_type, psb_spk_, psb_ipk_ + ! Arguments + integer(psb_ipk_), intent(in) :: n + type(psb_sspmat_type), intent(in) :: a + type(psb_sspmat_type), intent(inout) :: z + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_spk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(out) :: info + end subroutine psb_ssparse_invt + end interface + + interface + subroutine psb_s_invt_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,nrmi,row,heap,& + & irwt,ktrw,trw,info,sign) + ! Import + import psb_s_csr_sparse_mat, psb_s_coo_sparse_mat, psb_spk_, & + & psb_ipk_, psb_i_heap + ! Arguments + type(psb_s_csr_sparse_mat), intent(in) :: a + type(psb_s_coo_sparse_mat), intent(inout) :: trw + integer(psb_ipk_), intent(in) :: i, m,jmin,jmax,jd + integer(psb_ipk_), intent(inout) :: ktrw,nlw,nup,jmaxup,info + integer(psb_ipk_), intent(inout) :: irwt(:) + real(psb_spk_), intent(inout) :: nrmi + real(psb_spk_), intent(inout) :: row(:) + type(psb_i_heap), intent(inout) :: heap + real(psb_spk_), intent(in), optional :: sign + end subroutine psb_s_invt_copyin + end interface + + interface + subroutine psb_s_invt_copyout(fill_in,thres,i,m,nlw,nup,jmaxup,nrmi,row, & + & nidx,idxs,l2,ja,irp,val,info) + ! Import + import psb_spk_, psb_ipk_ + ! Arguments + integer(psb_ipk_), intent(in) :: fill_in,i,m,nidx,nlw,nup,jmaxup + integer(psb_ipk_), intent(in) :: idxs(:) + integer(psb_ipk_), intent(inout) :: l2, info + integer(psb_ipk_), allocatable, intent(inout) :: ja(:),irp(:) + real(psb_spk_), intent(in) :: thres,nrmi + real(psb_spk_),allocatable, intent(inout) :: val(:) + real(psb_spk_), intent(inout) :: row(:) + end subroutine psb_s_invt_copyout + end interface + + interface psb_invt_inv + subroutine psb_s_invt_inv(thres,i,nrmi,row,heap,irwt,ja,irp,val, & + & nidx,idxs,info) + ! import + import psb_spk_, psb_i_heap, psb_ipk_ + ! Arguments + type(psb_i_heap), intent(inout) :: heap + integer(psb_ipk_), intent(in) :: i + integer(psb_ipk_), intent(inout) :: nidx,info + integer(psb_ipk_), intent(inout) :: irwt(:) + real(psb_spk_), intent(in) :: thres,nrmi + integer(psb_ipk_), allocatable, intent(inout) :: idxs(:) + integer(psb_ipk_), intent(in) :: ja(:),irp(:) + real(psb_spk_), intent(in) :: val(:) + real(psb_spk_), intent(inout) :: row(:) + end subroutine + end interface + +contains + +end module psb_s_invt_fact_mod diff --git a/prec/psb_s_prec_type.f90 b/prec/psb_s_prec_type.f90 index b6c43c8a..764e9109 100644 --- a/prec/psb_s_prec_type.f90 +++ b/prec/psb_s_prec_type.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,8 +27,8 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! Module to define PREC_DATA, !! !! structure for preconditioning. !! @@ -54,6 +54,10 @@ module psb_s_prec_type procedure, pass(prec) :: build => psb_sprecbld procedure, pass(prec) :: init => psb_sprecinit procedure, pass(prec) :: descr => psb_sfile_prec_descr + procedure, pass(prec) :: cseti => psb_scprecseti + procedure, pass(prec) :: csetc => psb_scprecsetc + procedure, pass(prec) :: csetr => psb_scprecsetr + generic, public :: set => cseti, csetc, csetr procedure, pass(prec) :: allocate_wrk => psb_s_allocate_wrk procedure, pass(prec) :: free_wrk => psb_s_free_wrk procedure, pass(prec) :: is_allocated_wrk => psb_s_is_allocated_wrk @@ -102,7 +106,7 @@ module psb_s_prec_type module procedure psb_sprec_sizeof end interface - interface + interface subroutine psb_s_apply2_vect(prec,x,y,desc_data,info,trans,work) import :: psb_ipk_, psb_desc_type, psb_sprec_type, psb_s_vect_type, psb_spk_ type(psb_desc_type),intent(in) :: desc_data @@ -114,8 +118,8 @@ module psb_s_prec_type real(psb_spk_),intent(inout), optional, target :: work(:) end subroutine psb_s_apply2_vect end interface - - interface + + interface subroutine psb_s_apply1_vect(prec,x,desc_data,info,trans,work) import :: psb_ipk_, psb_desc_type, psb_sprec_type, psb_s_vect_type, psb_spk_ type(psb_desc_type),intent(in) :: desc_data @@ -126,7 +130,7 @@ module psb_s_prec_type real(psb_spk_),intent(inout), optional, target :: work(:) end subroutine psb_s_apply1_vect end interface - + interface subroutine psb_s_apply2v(prec,x,y,desc_data,info,trans,work) import :: psb_ipk_, psb_desc_type, psb_sprec_type, psb_s_vect_type, psb_spk_ @@ -139,8 +143,8 @@ module psb_s_prec_type real(psb_spk_),intent(inout), optional, target :: work(:) end subroutine psb_s_apply2v end interface - - interface + + interface subroutine psb_s_apply1v(prec,x,desc_data,info,trans) import :: psb_ipk_, psb_desc_type, psb_sprec_type, psb_s_vect_type, psb_spk_ type(psb_desc_type),intent(in) :: desc_data @@ -150,56 +154,89 @@ module psb_s_prec_type character(len=1), optional :: trans end subroutine psb_s_apply1v end interface - + + interface + subroutine psb_scprecseti(prec,what,val,info,ilev,ilmax,pos,idx) + import :: psb_sprec_type, psb_sspmat_type, psb_desc_type, psb_spk_, & + & psb_ipk_ + class(psb_sprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + integer(psb_ipk_), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + end subroutine psb_scprecseti + subroutine psb_scprecsetr(prec,what,val,info,ilev,ilmax,pos,idx) + import :: psb_sprec_type, psb_sspmat_type, psb_desc_type, psb_spk_, & + & psb_ipk_ + class(psb_sprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + real(psb_spk_), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + end subroutine psb_scprecsetr + subroutine psb_scprecsetc(prec,what,string,info,ilev,ilmax,pos,idx) + import :: psb_sprec_type, psb_sspmat_type, psb_desc_type, psb_spk_, & + & psb_ipk_ + class(psb_sprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + character(len=*), intent(in) :: string + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + end subroutine psb_scprecsetc +end interface + contains subroutine psb_sfile_prec_descr(prec,iout, root) use psb_base_mod - implicit none + implicit none class(psb_sprec_type), intent(in) :: prec integer(psb_ipk_), intent(in), optional :: iout integer(psb_ipk_), intent(in), optional :: root integer(psb_ipk_) :: iout_,info - character(len=20) :: name='prec_descr' - - if (present(iout)) then + character(len=20) :: name='prec_descr' + + if (present(iout)) then iout_ = iout else - iout_ = 6 + iout_ = 6 end if - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") end if call prec%prec%descr(iout=iout,root=root) - + end subroutine psb_sfile_prec_descr subroutine psb_s_prec_dump(prec,info,prefix,head) - implicit none + implicit none type(psb_sprec_type), intent(in) :: prec integer(psb_ipk_), intent(out) :: info character(len=*), intent(in), optional :: prefix,head - ! len of prefix_ + ! len of prefix_ info = 0 - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = -1 write(psb_err_unit,*) 'Trying to dump a non-built preconditioner' return end if - + call prec%prec%dump(info,prefix,head) - - + + end subroutine psb_s_prec_dump subroutine psb_s_allocate_wrk(prec,info,vmold,desc) use psb_base_mod implicit none - + ! Arguments class(psb_sprec_type), intent(inout) :: prec integer(psb_ipk_), intent(out) :: info @@ -209,33 +246,33 @@ contains ! Local variables integer(psb_ipk_) :: err_act character(len=20) :: name - + info=psb_success_ name = 'psb_s_allocate_wrk' call psb_erractionsave(err_act) - + if (psb_get_errstatus().ne.0) goto 9999 - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = -1 write(psb_err_unit,*) 'Trying to allocate wrk to a non-built preconditioner' return end if - + call prec%prec%allocate_wrk(info,vmold=vmold,desc=desc) - + call psb_erractionrestore(err_act) return - + 9999 call psb_error_handler(err_act) return - + end subroutine psb_s_allocate_wrk subroutine psb_s_free_wrk(prec,info) use psb_base_mod implicit none - + ! Arguments class(psb_sprec_type), intent(inout) :: prec integer(psb_ipk_), intent(out) :: info @@ -243,47 +280,47 @@ contains ! Local variables integer(psb_ipk_) :: err_act character(len=20) :: name - + info=psb_success_ name = 'psb_s_free_wrk' call psb_erractionsave(err_act) - + if (psb_get_errstatus().ne.0) goto 9999 - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = -1 write(psb_err_unit,*) 'Trying to free a non-built preconditioner' return end if - + call prec%prec%free_wrk(info) - + call psb_erractionrestore(err_act) return - + 9999 call psb_error_handler(err_act) return - + end subroutine psb_s_free_wrk function psb_s_is_allocated_wrk(prec) result(res) implicit none - + ! Arguments class(psb_sprec_type), intent(in) :: prec logical :: res if (.not.allocated(prec%prec)) then res = .false. - else + else res = prec%prec%is_allocated_wrk() end if - + end function psb_s_is_allocated_wrk subroutine psb_s_precfree(p,info) use psb_base_mod - implicit none + implicit none type(psb_sprec_type), intent(inout) :: p integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: me, err_act,i @@ -303,14 +340,14 @@ contains return 9999 call psb_error_handler(err_act) - + return end subroutine psb_s_precfree subroutine psb_s_prec_free(prec,info) use psb_base_mod - implicit none + implicit none class(psb_sprec_type), intent(inout) :: prec integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: me, err_act,i @@ -324,7 +361,7 @@ contains me=-1 - if (allocated(prec%prec)) then + if (allocated(prec%prec)) then call prec%prec%free(info) if (info /= psb_success_) goto 9999 deallocate(prec%prec,stat=info) @@ -343,26 +380,26 @@ contains class(psb_sprec_type), intent(in) :: prec integer(psb_epk_) :: val integer(psb_ipk_) :: i - + val = 0 - if (allocated(prec%prec)) then + if (allocated(prec%prec)) then val = val + prec%prec%sizeof() end if - + end function psb_sprec_sizeof subroutine psb_s_prec_clone(prec,precout,info) - implicit none + implicit none class(psb_sprec_type), intent(inout) :: prec class(psb_sprec_type), intent(inout) :: precout integer(psb_ipk_), intent(out) :: info info = psb_success_ call prec%free(info) - if (allocated(prec%prec)) then + if (allocated(prec%prec)) then call prec%prec%clone(precout%prec,info) end if - + end subroutine psb_s_prec_clone end module psb_s_prec_type diff --git a/prec/psb_z_ainv_fact_mod.f90 b/prec/psb_z_ainv_fact_mod.f90 new file mode 100644 index 00000000..490fe132 --- /dev/null +++ b/prec/psb_z_ainv_fact_mod.f90 @@ -0,0 +1,98 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from MLD2P4, original copyright below. +! +! +! MLD2P4 version 2.2 +! MultiLevel Domain Decomposition Parallel Preconditioners Package +! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the MLD2P4 group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MLD2P4 GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! +! File: psb_z_ainv_fact_mod.f90 +! +! Module: psb_z_ainv_fact_mod +! +! This module defines some interfaces used internally by the implementation of +! psb_z_ainv_solver, but not visible to the end user. +! +! +module psb_z_ainv_fact_mod + use psb_base_mod + use psb_prec_const_mod + + interface psb_ainv_fact + subroutine psb_z_ainv_bld(a,alg,fillin,thresh,wmat,d,zmat,desc,info,blck,iscale) + import psb_zspmat_type, psb_dpk_, psb_ipk_, psb_desc_type + type(psb_zspmat_type), intent(in), target :: a + integer(psb_ipk_), intent(in) :: fillin,alg + real(psb_dpk_), intent(in) :: thresh + type(psb_zspmat_type), intent(inout) :: wmat, zmat + complex(psb_dpk_), allocatable :: d(:) + Type(psb_desc_type), Intent(in) :: desc + integer(psb_ipk_), intent(out) :: info + type(psb_zspmat_type), intent(in), optional :: blck + integer(psb_ipk_), intent(in), optional :: iscale + end subroutine psb_z_ainv_bld + end interface + +end module psb_z_ainv_fact_mod diff --git a/prec/psb_z_ainv_tools_mod.f90 b/prec/psb_z_ainv_tools_mod.f90 new file mode 100644 index 00000000..f611c2a7 --- /dev/null +++ b/prec/psb_z_ainv_tools_mod.f90 @@ -0,0 +1,132 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG-AINV, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! +! +! +! +module psb_z_ainv_tools_mod + + interface sp_drop + subroutine psb_z_sp_drop(idiag,nzrmax,sp_thresh,nz,iz,valz,info) + use psb_base_mod, only : psb_dpk_, psb_ipk_ + implicit none + real(psb_dpk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(in) :: idiag, nzrmax + integer(psb_ipk_), intent(inout) :: nz + integer(psb_ipk_), intent(inout) :: iz(:) + complex(psb_dpk_), intent(inout) :: valz(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_sp_drop + end interface + + interface rwclip + subroutine psb_z_rwclip(nz,ia,ja,val,imin,imax,jmin,jmax) + use psb_base_mod, only : psb_dpk_, psb_ipk_ + + implicit none + integer(psb_ipk_), intent(inout) :: nz + integer(psb_ipk_), intent(inout) :: ia(*), ja(*) + complex(psb_dpk_), intent(inout) :: val(*) + integer(psb_ipk_), intent(in) :: imin,imax,jmin,jmax + end subroutine psb_z_rwclip + end interface + + interface sparsify + subroutine psb_z_sparsify(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,info, & + & istart,iheap,ikr) + use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_i_heap + implicit none + + real(psb_dpk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(in) :: idiag, n, nzrmax + complex(psb_dpk_), intent(inout) :: zw(:) + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), intent(out) :: iz(:) + complex(psb_dpk_), intent(out) :: valz(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: istart + type(psb_i_heap), optional :: iheap + integer(psb_ipk_), optional :: ikr(:) + end subroutine psb_z_sparsify + subroutine psb_z_sparsify_list(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,lhead,listv,ikr,info) + use psb_base_mod, only : psb_dpk_, psb_ipk_ + implicit none + + real(psb_dpk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(in) :: idiag, n, nzrmax + complex(psb_dpk_), intent(inout) :: zw(:) + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), intent(out) :: iz(:) + complex(psb_dpk_), intent(out) :: valz(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(inout) :: lhead, listv(:) + integer(psb_ipk_) :: ikr(:) + end subroutine psb_z_sparsify_list + + end interface + +end module psb_z_ainv_tools_mod diff --git a/prec/psb_z_biconjg_mod.F90 b/prec/psb_z_biconjg_mod.F90 new file mode 100644 index 00000000..b40485e7 --- /dev/null +++ b/prec/psb_z_biconjg_mod.F90 @@ -0,0 +1,367 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG-AINV, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +module psb_z_biconjg_mod + + use psb_base_mod + use psb_prec_const_mod + + interface psb_sparse_biconjg + module procedure psb_zsparse_biconjg + end interface + + abstract interface + subroutine psb_zsparse_biconjg_variant(n,a,p,z,w,nzrmax,sp_thresh,info) + use psb_base_mod, only : psb_z_csr_sparse_mat, psb_z_csc_sparse_mat, & + & psb_dpk_, psb_ipk_ + ! + implicit none + integer(psb_ipk_), intent(in) :: n + type(psb_z_csr_sparse_mat), intent(in) :: a + type(psb_z_csc_sparse_mat), intent(inout) :: z,w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_dpk_), intent(in) :: sp_thresh + complex(psb_dpk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_zsparse_biconjg_variant + end interface + + + procedure(psb_zsparse_biconjg_variant) :: psb_zsparse_biconjg_llk,& + & psb_zsparse_biconjg_s_llk, psb_zsparse_biconjg_s_ft_llk,& + & psb_zsparse_biconjg_llk_noth, psb_zsparse_biconjg_mlk + +#if defined(HAVE_TUMA_SAINV) + procedure(psb_zsparse_biconjg_variant) :: psb_zsparse_tuma_sainv,& + & psb_zsparse_tuma_lainv +#endif + +contains + + subroutine psb_zsparse_biconjg(alg,n,acsr,p,z,w,nzrmax,sp_thresh,info) + use psb_base_mod + use psb_prec_const_mod + integer(psb_ipk_), intent(in) :: alg,n + type(psb_z_csr_sparse_mat), intent(in) :: acsr + type(psb_zspmat_type), intent(out) :: z, w + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_dpk_), intent(in) :: sp_thresh + complex(psb_dpk_), intent(out) :: p(:) + integer(psb_ipk_), intent(out) :: info + + type(psb_z_csc_sparse_mat) :: zcsc,wcsc + integer(psb_ipk_) :: i,j,k,nrm + integer(psb_ipk_) :: err_act + character(len=20) :: name='psb_sparse_biconjg' + integer(psb_ipk_), parameter :: variant=1 + + + info = psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + + if (size(p) precout) - type is (psb_z_bjac_prec_type) + type is (psb_z_bjac_prec_type) call pout%set_ctxt(prec%get_ctxt()) - if (allocated(prec%av)) then + if (allocated(prec%av)) then allocate(pout%av(size(prec%av)),stat=info) - do i=1,size(prec%av) + do i=1,size(prec%av) if (info /= psb_success_) exit call prec%av(i)%clone(pout%av(i),info) enddo if (info /= psb_success_) goto 9999 end if - if (allocated(prec%dv)) then + if (allocated(prec%dv)) then allocate(pout%dv,stat=info) if (info == 0) call prec%dv%clone(pout%dv,info) end if @@ -316,7 +330,7 @@ contains subroutine psb_z_bjac_allocate_wrk(prec,info,vmold,desc) use psb_base_mod implicit none - + ! Arguments class(psb_z_bjac_prec_type), intent(inout) :: prec integer(psb_ipk_), intent(out) :: info @@ -326,11 +340,11 @@ contains ! Local variables integer(psb_ipk_) :: err_act, i character(len=20) :: name - + info=psb_success_ name = 'psb_z_allocate_wrk' call psb_erractionsave(err_act) - + if (psb_get_errstatus().ne.0) goto 9999 if (allocated(prec%wrk)) then if (size(prec%wrk)<2) then @@ -343,11 +357,11 @@ contains if (info /= 0) then info = psb_err_internal_error_; call psb_errpush(info,name,a_err="deallocate"); goto 9999 end if - + if (.not.allocated(prec%wrk)) then - if (.not.present(desc)) then + if (.not.present(desc)) then info = psb_err_internal_error_; call psb_errpush(info,name,a_err="no desc?"); goto 9999 - end if + end if allocate(prec%wrk(2),stat=info) do i=1, 2 if (info == 0) call psb_geall(prec%wrk(i),desc,info) @@ -357,19 +371,19 @@ contains if (info /= 0) then info = psb_err_internal_error_; call psb_errpush(info,name,a_err="allocate"); goto 9999 end if - + call psb_erractionrestore(err_act) return - + 9999 call psb_error_handler(err_act) return - + end subroutine psb_z_bjac_allocate_wrk subroutine psb_z_bjac_free_wrk(prec,info) use psb_base_mod implicit none - + ! Arguments class(psb_z_bjac_prec_type), intent(inout) :: prec integer(psb_ipk_), intent(out) :: info @@ -378,14 +392,14 @@ contains integer(psb_ipk_) :: err_act integer(psb_ipk_) :: i character(len=20) :: name - + info=psb_success_ name = 'psb_z_allocate_wrk' call psb_erractionsave(err_act) - + if (psb_get_errstatus().ne.0) goto 9999 - info = psb_success_ + info = psb_success_ if (allocated(prec%wrk)) then do i=1,size(prec%wrk) if (info == 0) call prec%wrk(i)%free(info) @@ -395,29 +409,29 @@ contains if (info /= 0) then info = psb_err_internal_error_; call psb_errpush(info,name,a_err="deallocate"); goto 9999 end if - + call psb_erractionrestore(err_act) return - + 9999 call psb_error_handler(err_act) return - + end subroutine psb_z_bjac_free_wrk function psb_z_bjac_is_allocated_wrk(prec) result(res) use psb_base_mod implicit none - + ! Arguments class(psb_z_bjac_prec_type), intent(in) :: prec logical :: res - ! In the base version we can say yes, because + ! In the base version we can say yes, because ! there is nothing to allocate res = allocated(prec%wrk) - + end function psb_z_bjac_is_allocated_wrk - + end module psb_z_bjacprec diff --git a/prec/psb_z_invk_fact_mod.f90 b/prec/psb_z_invk_fact_mod.f90 new file mode 100644 index 00000000..0a1e5faf --- /dev/null +++ b/prec/psb_z_invk_fact_mod.f90 @@ -0,0 +1,165 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG4PSBLAS, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_z_invk_fact_mod.f90 +! +! Module: psb_z_invk_fact_mod +! +! This module defines some interfaces used internally by the implementation of +! psb_z_invk_solver, but not visible to the end user. +! +! +module psb_z_invk_fact_mod + + use psb_base_mod + use psb_prec_const_mod + + interface psb_invk_fact + subroutine psb_z_invk_bld(a,fill1, fill2,lmat,d,umat,desc,info,blck) + ! import + import psb_zspmat_type, psb_ipk_, psb_dpk_, psb_desc_type + ! Arguments + type(psb_zspmat_type), intent(in), target :: a + integer(psb_ipk_), intent(in) :: fill1, fill2 + type(psb_zspmat_type), intent(inout) :: lmat, umat + complex(psb_dpk_), allocatable :: d(:) + Type(psb_desc_type), Intent(inout) :: desc + integer(psb_ipk_), intent(out) :: info + type(psb_zspmat_type), intent(in), optional :: blck + end subroutine + end interface + + ! Inner workings + interface psb_sparse_invk + subroutine psb_zsparse_invk(n,a,z,fill_in,info,inlevs) + ! Import + import psb_ipk_, psb_zspmat_type + ! Arguments + integer(psb_ipk_), intent(in) :: n + type(psb_zspmat_type), intent(in) :: a + type(psb_zspmat_type), intent(inout) :: z + integer(psb_ipk_), intent(in) :: fill_in + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: inlevs(:) + end subroutine psb_zsparse_invk + end interface + + interface psb_invk_inv + subroutine psb_zinvk_inv(fill_in,i,row,rowlevs,heap,uia1,uia2,uaspk,uplevs,& + & nidx,idxs,info) + + use psb_base_mod, only : psb_zspmat_type, psb_dpk_, psb_i_heap, psb_ipk_ + implicit none + + ! Arguments + type(psb_i_heap), intent(inout) :: heap + integer(psb_ipk_), intent(in) :: i, fill_in + integer(psb_ipk_), intent(inout) :: nidx,info + integer(psb_ipk_), intent(inout) :: rowlevs(:) + integer(psb_ipk_), allocatable, intent(inout) :: idxs(:) + integer(psb_ipk_), intent(in) :: uia1(:),uia2(:),uplevs(:) + complex(psb_dpk_), intent(in) :: uaspk(:) + complex(psb_dpk_), intent(inout) :: row(:) + + + end subroutine psb_zinvk_inv + end interface + + interface psb_invk_copyin + subroutine psb_z_invk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,& + & ktrw,trw,info,sign,inlevs) + ! Import + use psb_base_mod, only : psb_z_csr_sparse_mat, psb_z_coo_sparse_mat,& + & psb_dpk_, psb_i_heap, psb_ipk_ + implicit none + ! Arguments + type(psb_z_csr_sparse_mat), intent(in) :: a + type(psb_z_coo_sparse_mat), intent(inout) :: trw + integer(psb_ipk_), intent(in) :: i,m,jmin,jmax + integer(psb_ipk_), intent(inout) :: ktrw,info + integer(psb_ipk_), intent(inout) :: rowlevs(:) + complex(psb_dpk_), intent(inout) :: row(:) + type(psb_i_heap), intent(inout) :: heap + real(psb_dpk_), optional, intent(in) :: sign + integer(psb_ipk_), intent(in), optional :: inlevs(:) + end subroutine psb_z_invk_copyin + end interface + + interface psb_invk_copyout + subroutine psb_z_invk_copyout(fill_in,i,m,row,rowlevs,nidx,idxs,& + & l2,uia1,uia2,uaspk,info) + use psb_base_mod, only : psb_zspmat_type, psb_dpk_, psb_i_heap, psb_ipk_ + implicit none + ! Arguments + integer(psb_ipk_), intent(in) :: fill_in, i, m, nidx + integer(psb_ipk_), intent(inout) :: l2, info + integer(psb_ipk_), intent(inout) :: rowlevs(:), idxs(:) + integer(psb_ipk_), allocatable, intent(inout) :: uia1(:), uia2(:) + complex(psb_dpk_), allocatable, intent(inout) :: uaspk(:) + complex(psb_dpk_), intent(inout) :: row(:) + end subroutine psb_z_invk_copyout + end interface + +end module diff --git a/prec/psb_z_invt_fact_mod.f90 b/prec/psb_z_invt_fact_mod.f90 new file mode 100644 index 00000000..1cdf32f4 --- /dev/null +++ b/prec/psb_z_invt_fact_mod.f90 @@ -0,0 +1,168 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! Moved here from AMG4PSBLAS, original copyright below. +! +! +! AMG-AINV: Approximate Inverse plugin for +! AMG4PSBLAS version 1.0 +! +! (C) Copyright 2020 +! +! Salvatore Filippone University of Rome Tor Vergata +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_z_invt_fact_mod.f90 +! +! Module: psb_z_invt_fact_mod +! +! This module defines some interfaces used internally by the implementation of +! psb_z_invt_solver, but not visible to the end user. +! +! +module psb_z_invt_fact_mod + + use psb_base_mod + use psb_prec_const_mod + + interface psb_invt_fact + subroutine psb_z_invt_bld(a,fillin,invfill,thresh,invthresh,& + & lmat,d,umat,desc,info,blck) + ! Import + import psb_zspmat_type, psb_dpk_, psb_ipk_, psb_desc_type + ! Arguments + type(psb_zspmat_type), intent(in), target :: a + integer(psb_ipk_), intent(in) :: fillin,invfill + real(psb_dpk_), intent(in) :: thresh + real(psb_dpk_), intent(in) :: invthresh + type(psb_zspmat_type), intent(inout) :: lmat, umat + complex(psb_dpk_), allocatable :: d(:) + Type(psb_desc_type), Intent(inout) :: desc + integer(psb_ipk_), intent(out) :: info + type(psb_zspmat_type), intent(in), optional :: blck + end subroutine psb_z_invt_bld + end interface + + ! Interfaces for the inner workings + + interface + subroutine psb_zsparse_invt(n,a,z,nzrmax,sp_thresh,info) + ! Import + import psb_zspmat_type, psb_dpk_, psb_ipk_ + ! Arguments + integer(psb_ipk_), intent(in) :: n + type(psb_zspmat_type), intent(in) :: a + type(psb_zspmat_type), intent(inout) :: z + integer(psb_ipk_), intent(in) :: nzrmax + real(psb_dpk_), intent(in) :: sp_thresh + integer(psb_ipk_), intent(out) :: info + end subroutine psb_zsparse_invt + end interface + + interface + subroutine psb_z_invt_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,nrmi,row,heap,& + & irwt,ktrw,trw,info,sign) + ! Import + import psb_z_csr_sparse_mat, psb_z_coo_sparse_mat, psb_dpk_, & + & psb_ipk_, psb_i_heap + ! Arguments + type(psb_z_csr_sparse_mat), intent(in) :: a + type(psb_z_coo_sparse_mat), intent(inout) :: trw + integer(psb_ipk_), intent(in) :: i, m,jmin,jmax,jd + integer(psb_ipk_), intent(inout) :: ktrw,nlw,nup,jmaxup,info + integer(psb_ipk_), intent(inout) :: irwt(:) + real(psb_dpk_), intent(inout) :: nrmi + complex(psb_dpk_), intent(inout) :: row(:) + type(psb_i_heap), intent(inout) :: heap + real(psb_dpk_), intent(in), optional :: sign + end subroutine psb_z_invt_copyin + end interface + + interface + subroutine psb_z_invt_copyout(fill_in,thres,i,m,nlw,nup,jmaxup,nrmi,row, & + & nidx,idxs,l2,ja,irp,val,info) + ! Import + import psb_dpk_, psb_ipk_ + ! Arguments + integer(psb_ipk_), intent(in) :: fill_in,i,m,nidx,nlw,nup,jmaxup + integer(psb_ipk_), intent(in) :: idxs(:) + integer(psb_ipk_), intent(inout) :: l2, info + integer(psb_ipk_), allocatable, intent(inout) :: ja(:),irp(:) + real(psb_dpk_), intent(in) :: thres,nrmi + complex(psb_dpk_),allocatable, intent(inout) :: val(:) + complex(psb_dpk_), intent(inout) :: row(:) + end subroutine psb_z_invt_copyout + end interface + + interface psb_invt_inv + subroutine psb_z_invt_inv(thres,i,nrmi,row,heap,irwt,ja,irp,val, & + & nidx,idxs,info) + ! import + import psb_dpk_, psb_i_heap, psb_ipk_ + ! Arguments + type(psb_i_heap), intent(inout) :: heap + integer(psb_ipk_), intent(in) :: i + integer(psb_ipk_), intent(inout) :: nidx,info + integer(psb_ipk_), intent(inout) :: irwt(:) + real(psb_dpk_), intent(in) :: thres,nrmi + integer(psb_ipk_), allocatable, intent(inout) :: idxs(:) + integer(psb_ipk_), intent(in) :: ja(:),irp(:) + complex(psb_dpk_), intent(in) :: val(:) + complex(psb_dpk_), intent(inout) :: row(:) + end subroutine + end interface + +contains + +end module psb_z_invt_fact_mod diff --git a/prec/psb_z_prec_type.f90 b/prec/psb_z_prec_type.f90 index 7b15c8f2..f40e8004 100644 --- a/prec/psb_z_prec_type.f90 +++ b/prec/psb_z_prec_type.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,8 +27,8 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! Module to define PREC_DATA, !! !! structure for preconditioning. !! @@ -54,6 +54,10 @@ module psb_z_prec_type procedure, pass(prec) :: build => psb_zprecbld procedure, pass(prec) :: init => psb_zprecinit procedure, pass(prec) :: descr => psb_zfile_prec_descr + procedure, pass(prec) :: cseti => psb_zcprecseti + procedure, pass(prec) :: csetc => psb_zcprecsetc + procedure, pass(prec) :: csetr => psb_zcprecsetr + generic, public :: set => cseti, csetc, csetr procedure, pass(prec) :: allocate_wrk => psb_z_allocate_wrk procedure, pass(prec) :: free_wrk => psb_z_free_wrk procedure, pass(prec) :: is_allocated_wrk => psb_z_is_allocated_wrk @@ -102,7 +106,7 @@ module psb_z_prec_type module procedure psb_zprec_sizeof end interface - interface + interface subroutine psb_z_apply2_vect(prec,x,y,desc_data,info,trans,work) import :: psb_ipk_, psb_desc_type, psb_zprec_type, psb_z_vect_type, psb_dpk_ type(psb_desc_type),intent(in) :: desc_data @@ -114,8 +118,8 @@ module psb_z_prec_type complex(psb_dpk_),intent(inout), optional, target :: work(:) end subroutine psb_z_apply2_vect end interface - - interface + + interface subroutine psb_z_apply1_vect(prec,x,desc_data,info,trans,work) import :: psb_ipk_, psb_desc_type, psb_zprec_type, psb_z_vect_type, psb_dpk_ type(psb_desc_type),intent(in) :: desc_data @@ -126,7 +130,7 @@ module psb_z_prec_type complex(psb_dpk_),intent(inout), optional, target :: work(:) end subroutine psb_z_apply1_vect end interface - + interface subroutine psb_z_apply2v(prec,x,y,desc_data,info,trans,work) import :: psb_ipk_, psb_desc_type, psb_zprec_type, psb_z_vect_type, psb_dpk_ @@ -139,8 +143,8 @@ module psb_z_prec_type complex(psb_dpk_),intent(inout), optional, target :: work(:) end subroutine psb_z_apply2v end interface - - interface + + interface subroutine psb_z_apply1v(prec,x,desc_data,info,trans) import :: psb_ipk_, psb_desc_type, psb_zprec_type, psb_z_vect_type, psb_dpk_ type(psb_desc_type),intent(in) :: desc_data @@ -150,56 +154,89 @@ module psb_z_prec_type character(len=1), optional :: trans end subroutine psb_z_apply1v end interface - + + interface + subroutine psb_zcprecseti(prec,what,val,info,ilev,ilmax,pos,idx) + import :: psb_zprec_type, psb_zspmat_type, psb_desc_type, psb_dpk_, & + & psb_ipk_ + class(psb_zprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + integer(psb_ipk_), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + end subroutine psb_zcprecseti + subroutine psb_zcprecsetr(prec,what,val,info,ilev,ilmax,pos,idx) + import :: psb_zprec_type, psb_zspmat_type, psb_desc_type, psb_dpk_, & + & psb_ipk_ + class(psb_zprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + real(psb_dpk_), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + end subroutine psb_zcprecsetr + subroutine psb_zcprecsetc(prec,what,string,info,ilev,ilmax,pos,idx) + import :: psb_zprec_type, psb_zspmat_type, psb_desc_type, psb_dpk_, & + & psb_ipk_ + class(psb_zprec_type), intent(inout) :: prec + character(len=*), intent(in) :: what + character(len=*), intent(in) :: string + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), optional, intent(in) :: ilev,ilmax,idx + character(len=*), optional, intent(in) :: pos + end subroutine psb_zcprecsetc +end interface + contains subroutine psb_zfile_prec_descr(prec,iout, root) use psb_base_mod - implicit none + implicit none class(psb_zprec_type), intent(in) :: prec integer(psb_ipk_), intent(in), optional :: iout integer(psb_ipk_), intent(in), optional :: root integer(psb_ipk_) :: iout_,info - character(len=20) :: name='prec_descr' - - if (present(iout)) then + character(len=20) :: name='prec_descr' + + if (present(iout)) then iout_ = iout else - iout_ = 6 + iout_ = 6 end if - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = 1124 call psb_errpush(info,name,a_err="preconditioner") end if call prec%prec%descr(iout=iout,root=root) - + end subroutine psb_zfile_prec_descr subroutine psb_z_prec_dump(prec,info,prefix,head) - implicit none + implicit none type(psb_zprec_type), intent(in) :: prec integer(psb_ipk_), intent(out) :: info character(len=*), intent(in), optional :: prefix,head - ! len of prefix_ + ! len of prefix_ info = 0 - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = -1 write(psb_err_unit,*) 'Trying to dump a non-built preconditioner' return end if - + call prec%prec%dump(info,prefix,head) - - + + end subroutine psb_z_prec_dump subroutine psb_z_allocate_wrk(prec,info,vmold,desc) use psb_base_mod implicit none - + ! Arguments class(psb_zprec_type), intent(inout) :: prec integer(psb_ipk_), intent(out) :: info @@ -209,33 +246,33 @@ contains ! Local variables integer(psb_ipk_) :: err_act character(len=20) :: name - + info=psb_success_ name = 'psb_z_allocate_wrk' call psb_erractionsave(err_act) - + if (psb_get_errstatus().ne.0) goto 9999 - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = -1 write(psb_err_unit,*) 'Trying to allocate wrk to a non-built preconditioner' return end if - + call prec%prec%allocate_wrk(info,vmold=vmold,desc=desc) - + call psb_erractionrestore(err_act) return - + 9999 call psb_error_handler(err_act) return - + end subroutine psb_z_allocate_wrk subroutine psb_z_free_wrk(prec,info) use psb_base_mod implicit none - + ! Arguments class(psb_zprec_type), intent(inout) :: prec integer(psb_ipk_), intent(out) :: info @@ -243,47 +280,47 @@ contains ! Local variables integer(psb_ipk_) :: err_act character(len=20) :: name - + info=psb_success_ name = 'psb_z_free_wrk' call psb_erractionsave(err_act) - + if (psb_get_errstatus().ne.0) goto 9999 - if (.not.allocated(prec%prec)) then + if (.not.allocated(prec%prec)) then info = -1 write(psb_err_unit,*) 'Trying to free a non-built preconditioner' return end if - + call prec%prec%free_wrk(info) - + call psb_erractionrestore(err_act) return - + 9999 call psb_error_handler(err_act) return - + end subroutine psb_z_free_wrk function psb_z_is_allocated_wrk(prec) result(res) implicit none - + ! Arguments class(psb_zprec_type), intent(in) :: prec logical :: res if (.not.allocated(prec%prec)) then res = .false. - else + else res = prec%prec%is_allocated_wrk() end if - + end function psb_z_is_allocated_wrk subroutine psb_z_precfree(p,info) use psb_base_mod - implicit none + implicit none type(psb_zprec_type), intent(inout) :: p integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: me, err_act,i @@ -303,14 +340,14 @@ contains return 9999 call psb_error_handler(err_act) - + return end subroutine psb_z_precfree subroutine psb_z_prec_free(prec,info) use psb_base_mod - implicit none + implicit none class(psb_zprec_type), intent(inout) :: prec integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: me, err_act,i @@ -324,7 +361,7 @@ contains me=-1 - if (allocated(prec%prec)) then + if (allocated(prec%prec)) then call prec%prec%free(info) if (info /= psb_success_) goto 9999 deallocate(prec%prec,stat=info) @@ -343,26 +380,26 @@ contains class(psb_zprec_type), intent(in) :: prec integer(psb_epk_) :: val integer(psb_ipk_) :: i - + val = 0 - if (allocated(prec%prec)) then + if (allocated(prec%prec)) then val = val + prec%prec%sizeof() end if - + end function psb_zprec_sizeof subroutine psb_z_prec_clone(prec,precout,info) - implicit none + implicit none class(psb_zprec_type), intent(inout) :: prec class(psb_zprec_type), intent(inout) :: precout integer(psb_ipk_), intent(out) :: info info = psb_success_ call prec%free(info) - if (allocated(prec%prec)) then + if (allocated(prec%prec)) then call prec%prec%clone(precout%prec,info) end if - + end subroutine psb_z_prec_clone end module psb_z_prec_type diff --git a/test/pargen/psb_d_pde2d.f90 b/test/pargen/psb_d_pde2d.f90 index edd5034f..1ccd7f32 100644 --- a/test/pargen/psb_d_pde2d.f90 +++ b/test/pargen/psb_d_pde2d.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,23 +27,23 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! ! File: psb_d_pde2d.f90 ! ! Program: psb_d_pde2d ! This sample program solves a linear system obtained by discretizing a -! PDE with Dirichlet BCs. -! +! PDE with Dirichlet BCs. +! ! ! The PDE is a general second order equation in 2d ! -! a1 dd(u) a2 dd(u) b1 d(u) b2 d(u) +! a1 dd(u) a2 dd(u) b1 d(u) b2 d(u) ! - ------ - ------ ----- + ------ + c u = f -! dxdx dydy dx dy +! dxdx dydy dx dy ! ! with Dirichlet boundary conditions -! u = g +! u = g ! ! on the unit square 0<=x,y<=1. ! @@ -63,31 +63,31 @@ module psb_d_pde2d_mod & psb_dspmat_type, psb_d_vect_type, dzero,& & psb_d_base_sparse_mat, psb_d_base_vect_type, psb_i_base_vect_type - interface + interface function d_func_2d(x,y) result(val) import :: psb_dpk_ real(psb_dpk_), intent(in) :: x,y real(psb_dpk_) :: val end function d_func_2d - end interface + end interface interface psb_gen_pde2d module procedure psb_d_gen_pde2d end interface psb_gen_pde2d - + contains - + function d_null_func_2d(x,y) result(val) real(psb_dpk_), intent(in) :: x,y real(psb_dpk_) :: val - + val = dzero end function d_null_func_2d ! - ! functions parametrizing the differential equation + ! functions parametrizing the differential equation ! ! @@ -101,48 +101,48 @@ contains ! function b1(x,y) use psb_base_mod, only : psb_dpk_, done, dzero - implicit none + implicit none real(psb_dpk_) :: b1 real(psb_dpk_), intent(in) :: x,y b1=dzero end function b1 function b2(x,y) use psb_base_mod, only : psb_dpk_, done, dzero - implicit none + implicit none real(psb_dpk_) :: b2 real(psb_dpk_), intent(in) :: x,y b2=dzero end function b2 function c(x,y) use psb_base_mod, only : psb_dpk_, done, dzero - implicit none + implicit none real(psb_dpk_) :: c real(psb_dpk_), intent(in) :: x,y c=0.d0 end function c function a1(x,y) use psb_base_mod, only : psb_dpk_, done, dzero - implicit none - real(psb_dpk_) :: a1 + implicit none + real(psb_dpk_) :: a1 real(psb_dpk_), intent(in) :: x,y a1=done/80 end function a1 function a2(x,y) use psb_base_mod, only : psb_dpk_, done, dzero - implicit none + implicit none real(psb_dpk_) :: a2 real(psb_dpk_), intent(in) :: x,y a2=done/80 end function a2 function g(x,y) use psb_base_mod, only : psb_dpk_, done, dzero - implicit none + implicit none real(psb_dpk_) :: g real(psb_dpk_), intent(in) :: x,y g = dzero if (x == done) then g = done - else if (x == dzero) then + else if (x == dzero) then g = exp(-y**2) end if end function g @@ -150,7 +150,7 @@ contains ! ! subroutine to allocate and fill in the coefficient matrix and - ! the rhs. + ! the rhs. ! subroutine psb_d_gen_pde2d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& & f,amold,vmold,imold,partition,nrl,iv) @@ -158,13 +158,13 @@ contains use psb_util_mod ! ! Discretizes the partial differential equation - ! - ! a1 dd(u) a2 dd(u) b1 d(u) b2 d(u) + ! + ! a1 dd(u) a2 dd(u) b1 d(u) b2 d(u) ! - ------ - ------ + ----- + ------ + c u = f - ! dxdx dydy dx dy + ! dxdx dydy dx dy ! ! with Dirichlet boundary conditions - ! u = g + ! u = g ! ! on the unit square 0<=x,y<=1. ! @@ -222,7 +222,7 @@ contains call psb_info(ctxt, iam, np) - if (present(f)) then + if (present(f)) then f_ => f else f_ => d_null_func_2d @@ -242,9 +242,9 @@ contains else partition_ = 3 end if - + ! initialize array descriptor and sparse matrix storage. provide an - ! estimate of the number of non zeroes + ! estimate of the number of non zeroes m = (1_psb_lpk_)*idim*idim n = m @@ -253,8 +253,8 @@ contains t0 = psb_wtime() select case(partition_) case(1) - ! A BLOCK partition - if (present(nrl)) then + ! A BLOCK partition + if (present(nrl)) then nr = nrl else ! @@ -265,46 +265,46 @@ contains end if nt = nr - call psb_sum(ctxt,nt) - if (nt /= m) then + call psb_sum(ctxt,nt) + if (nt /= m) then write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m info = -1 call psb_barrier(ctxt) call psb_abort(ctxt) - return + return end if ! ! First example of use of CDALL: specify for each process a number of ! contiguous rows - ! + ! call psb_cdall(ctxt,desc_a,info,nl=nr) myidx = desc_a%get_global_indices() nlr = size(myidx) case(2) ! A partition defined by the user through IV - - if (present(iv)) then + + if (present(iv)) then if (size(iv) /= m) then write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m info = -1 call psb_barrier(ctxt) call psb_abort(ctxt) - return + return end if else write(psb_err_unit,*) iam, 'Initialization error: IV not present' info = -1 call psb_barrier(ctxt) call psb_abort(ctxt) - return + return end if ! ! Second example of use of CDALL: specify for each row the - ! process that owns it - ! + ! process that owns it + ! call psb_cdall(ctxt,desc_a,info,vg=iv) myidx = desc_a%get_global_indices() nlr = size(myidx) @@ -319,7 +319,7 @@ contains npy = npdims(2) allocate(bndx(0:npx),bndy(0:npy)) - ! We can reuse idx2ijk for process indices as well. + ! We can reuse idx2ijk for process indices as well. call idx2ijk(iamx,iamy,iam,npx,npy,base=0) ! Now let's split the 2D square in rectangles call dist1Didx(bndx,idim,npx) @@ -327,7 +327,7 @@ contains call dist1Didx(bndy,idim,npy) myny = bndy(iamy+1)-bndy(iamy) - ! How many indices do I own? + ! How many indices do I own? nlr = mynx*myny allocate(myidx(nlr)) ! Now, let's generate the list of indices I own @@ -349,9 +349,8 @@ contains ! ! Third example of use of CDALL: specify for each process ! the set of global indices it owns. - ! + ! call psb_cdall(ctxt,desc_a,info,vl=myidx) - case default write(psb_err_unit,*) iam, 'Initialization error: should not get here' info = -1 @@ -360,9 +359,9 @@ contains return end select - + if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz) - ! define rhs from boundary conditions; also build initial guess + ! define rhs from boundary conditions; also build initial guess if (info == psb_success_) call psb_geall(xv,desc_a,info) if (info == psb_success_) call psb_geall(bv,desc_a,info) @@ -377,12 +376,12 @@ contains end if ! we build an auxiliary matrix consisting of one row at a - ! time; just a small matrix. might be extended to generate - ! a bunch of rows per call. - ! + ! time; just a small matrix. might be extended to generate + ! a bunch of rows per call. + ! allocate(val(20*nb),irow(20*nb),& &icol(20*nb),stat=info) - if (info /= psb_success_ ) then + if (info /= psb_success_ ) then info=psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 @@ -395,11 +394,11 @@ contains call psb_barrier(ctxt) t1 = psb_wtime() do ii=1, nlr,nb - ib = min(nb,nlr-ii+1) + ib = min(nb,nlr-ii+1) icoeff = 1 do k=1,ib i=ii+k-1 - ! local matrix pointer + ! local matrix pointer glob_row=myidx(i) ! compute gridpoint coordinates call idx2ijk(ix,iy,glob_row,idim,idim) @@ -409,11 +408,11 @@ contains zt(k) = f_(x,y) ! internal point: build discretization - ! + ! ! term depending on (x-1,y) ! val(icoeff) = -a1(x,y)/sqdeltah-b1(x,y)/deltah2 - if (ix == 1) then + if (ix == 1) then zt(k) = g(dzero,y)*(-val(icoeff)) + zt(k) else call ijk2idx(icol(icoeff),ix-1,iy,idim,idim) @@ -422,7 +421,7 @@ contains endif ! term depending on (x,y-1) val(icoeff) = -a2(x,y)/sqdeltah-b2(x,y)/deltah2 - if (iy == 1) then + if (iy == 1) then zt(k) = g(x,dzero)*(-val(icoeff)) + zt(k) else call ijk2idx(icol(icoeff),ix,iy-1,idim,idim) @@ -434,10 +433,10 @@ contains val(icoeff)=(2*done)*(a1(x,y) + a2(x,y))/sqdeltah + c(x,y) call ijk2idx(icol(icoeff),ix,iy,idim,idim) irow(icoeff) = glob_row - icoeff = icoeff+1 + icoeff = icoeff+1 ! term depending on (x,y+1) val(icoeff)=-a2(x,y)/sqdeltah+b2(x,y)/deltah2 - if (iy == idim) then + if (iy == idim) then zt(k) = g(x,done)*(-val(icoeff)) + zt(k) else call ijk2idx(icol(icoeff),ix,iy+1,idim,idim) @@ -446,7 +445,7 @@ contains endif ! term depending on (x+1,y) val(icoeff)=-a1(x,y)/sqdeltah+b1(x,y)/deltah2 - if (ix==idim) then + if (ix==idim) then zt(k) = g(done,y)*(-val(icoeff)) + zt(k) else call ijk2idx(icol(icoeff),ix+1,iy,idim,idim) @@ -480,8 +479,8 @@ contains tcdasb = psb_wtime()-t1 call psb_barrier(ctxt) t1 = psb_wtime() - if (info == psb_success_) then - if (present(amold)) then + if (info == psb_success_) then + if (present(amold)) then call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,mold=amold) else call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,afmt=afmt) @@ -504,7 +503,7 @@ contains end if tasb = psb_wtime()-t1 call psb_barrier(ctxt) - ttot = psb_wtime() - t0 + ttot = psb_wtime() - t0 call psb_amx(ctxt,talc) call psb_amx(ctxt,tgen) @@ -545,9 +544,9 @@ program psb_d_pde2d integer(psb_ipk_) :: idim integer(psb_epk_) :: system_size - ! miscellaneous + ! miscellaneous real(psb_dpk_), parameter :: one = done - real(psb_dpk_) :: t1, t2, tprec + real(psb_dpk_) :: t1, t2, tprec ! sparse matrix and preconditioner type(psb_dspmat_type) :: a @@ -565,6 +564,14 @@ program psb_d_pde2d integer(psb_epk_) :: amatsize, precsize, descsize, d2size real(psb_dpk_) :: err, eps + ! Parameters for solvers in Block-Jacobi preconditioner + type ainvparms + character(len=12) :: alg, orth_alg, ilu_alg, ilut_scale + integer(psb_ipk_) :: fill, inv_fill + real(psb_dpk_) :: thresh, inv_thresh + end type ainvparms + type(ainvparms) :: parms + ! other variables integer(psb_ipk_) :: info, i character(len=20) :: name,ch_err @@ -576,7 +583,7 @@ program psb_d_pde2d call psb_init(ctxt) call psb_info(ctxt,iam,np) - if (iam < 0) then + if (iam < 0) then ! This should not happen, but just in case call psb_exit(ctxt) stop @@ -587,21 +594,21 @@ program psb_d_pde2d ! ! Hello world ! - if (iam == psb_root_) then + if (iam == psb_root_) then write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ write(*,*) 'This is the ',trim(name),' sample program' end if ! ! get parameters ! - call get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart) + call get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart,parms) ! - ! allocate and fill in the coefficient matrix, rhs and initial guess + ! allocate and fill in the coefficient matrix, rhs and initial guess ! call psb_barrier(ctxt) t1 = psb_wtime() - call psb_gen_pde2d(ctxt,idim,a,bv,xxv,desc_a,afmt,info,partition=ipart) + call psb_gen_pde2d(ctxt,idim,a,bv,xxv,desc_a,afmt,info,partition=ipart) call psb_barrier(ctxt) t2 = psb_wtime() - t1 if(info /= psb_success_) then @@ -614,9 +621,43 @@ program psb_d_pde2d if (iam == psb_root_) write(psb_out_unit,'(" ")') ! ! prepare the preconditioner. - ! + ! if(iam == psb_root_) write(psb_out_unit,'("Setting preconditioner to : ",a)')ptype call prec%init(ctxt,ptype,info) + ! + ! Set the options for the BJAC preconditioner + ! + if (psb_toupper(ptype) == "BJAC") then + call prec%set('sub_solve', parms%alg, info) + select case (psb_toupper(parms%alg)) + case ("ILU") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('ilu_alg', parms%ilu_alg, info) + case ("ILUT") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('sub_iluthrs', parms%thresh, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + case ("AINV") + call prec%set('inv_thresh', parms%inv_thresh, info) + call prec%set('inv_fillin', parms%inv_fill, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + call prec%set('ainv_alg', parms%orth_alg, info) + case ("INVK") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('inv_fillin', parms%inv_fill, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + case ("INVT") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('inv_fillin', parms%inv_fill, info) + call prec%set('sub_iluthrs', parms%thresh, info) + call prec%set('inv_thresh', parms%inv_thresh, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + case default + ! Do nothing, use default setting in the init routine + end select + else + ! nothing to set for NONE or DIAG preconditioner + end if call psb_barrier(ctxt) t1 = psb_wtime() @@ -636,14 +677,14 @@ program psb_d_pde2d if (iam == psb_root_) write(psb_out_unit,'(" ")') call prec%descr() ! - ! iterative method parameters + ! iterative method parameters ! if(iam == psb_root_) write(psb_out_unit,'("Calling iterative method ",a)')kmethd call psb_barrier(ctxt) - t1 = psb_wtime() + t1 = psb_wtime() eps = 1.d-6 - call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,& - & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst) + call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,& + & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst) if(info /= psb_success_) then info=psb_err_from_subroutine_ @@ -673,14 +714,14 @@ program psb_d_pde2d write(psb_out_unit,'("Convergence indicator on exit : ",es12.5)')err write(psb_out_unit,'("Info on exit : ",i12)')info write(psb_out_unit,'("Total memory occupation for A: ",i12)')amatsize - write(psb_out_unit,'("Total memory occupation for PREC: ",i12)')precsize + write(psb_out_unit,'("Total memory occupation for PREC: ",i12)')precsize write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize write(psb_out_unit,'("Storage format for A: ",a)') a%get_fmt() write(psb_out_unit,'("Storage format for DESC_A: ",a)') desc_a%get_fmt() end if - ! + ! ! cleanup storage and exit ! call psb_gefree(bv,desc_a,info) @@ -706,13 +747,14 @@ contains ! ! get iteration parameters from standard input ! - subroutine get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart) + subroutine get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart,parms) type(psb_ctxt_type) :: ctxt character(len=*) :: kmethd, ptype, afmt integer(psb_ipk_) :: idim, istopc,itmax,itrace,irst,ipart integer(psb_ipk_) :: np, iam integer(psb_ipk_) :: ip, inp_unit character(len=1024) :: filename + type(ainvparms) :: parms call psb_info(ctxt, iam, np) @@ -741,12 +783,12 @@ contains if (ip >= 4) then read(inp_unit,*) ipart else - ipart = 3 + ipart = 3 endif if (ip >= 5) then read(inp_unit,*) istopc else - istopc=1 + istopc=1 endif if (ip >= 6) then read(inp_unit,*) itmax @@ -763,8 +805,27 @@ contains else irst=1 endif + if (ip >= 9) then + read(inp_unit,*) parms%alg + read(inp_unit,*) parms%ilu_alg + read(inp_unit,*) parms%ilut_scale + read(inp_unit,*) parms%fill + read(inp_unit,*) parms%inv_fill + read(inp_unit,*) parms%thresh + read(inp_unit,*) parms%inv_thresh + read(inp_unit,*) parms%orth_alg + else + parms%alg = 'ILU' ! Block Solver ILU,ILUT,INVK,AINVT,AORTH + parms%ilu_alg = 'NONE' ! If ILU : MILU or NONE othewise ignored + parms%ilut_scale = 'NONE' ! If ILUT: NONE, MAXVAL, DIAG, ARWSUM, ACLSUM, ARCSUM + parms%fill = 0 ! Level of fill for forward factorization + parms%inv_fill = 1 ! Level of fill for inverse factorization (only INVK) + parms%thresh = 1E-1_psb_dpk_ ! Threshold for forward factorization + parms%inv_thresh = 1E-1_psb_dpk_ ! Threshold for inverse factorization + parms%orth_alg = 'LLK' ! What orthogonalization algorithm? + endif - write(psb_out_unit,'("Solving matrix : ell1")') + write(psb_out_unit,'("Solving matrix : ell1")') write(psb_out_unit,'("Grid dimensions : ",i5," x ",i5)')idim,idim write(psb_out_unit,'("Number of processors : ",i0)') np select case(ipart) @@ -777,11 +838,40 @@ contains write(psb_out_unit,'("Unknown data distrbution, defaulting to 2D")') end select write(psb_out_unit,'("Preconditioner : ",a)') ptype + if( psb_toupper(ptype) == "BJAC" ) then + write(psb_out_unit,'("Block subsolver : ",a)') parms%alg + select case (psb_toupper(parms%alg)) + case ('ILU') + write(psb_out_unit,'("Fill in : ",i0)') parms%fill + write(psb_out_unit,'("MILU : ",a)') parms%ilu_alg + case ('ILUT') + write(psb_out_unit,'("Fill in : ",i0)') parms%fill + write(psb_out_unit,'("Threshold : ",es12.5)') parms%thresh + write(psb_out_unit,'("Scaling : ",a)') parms%ilut_scale + case ('INVK') + write(psb_out_unit,'("Fill in : ",i0)') parms%fill + write(psb_out_unit,'("Invese Fill in : ",i0)') parms%inv_fill + write(psb_out_unit,'("Scaling : ",a)') parms%ilut_scale + case ('INVT') + write(psb_out_unit,'("Fill in : ",i0)') parms%fill + write(psb_out_unit,'("Threshold : ",es12.5)') parms%thresh + write(psb_out_unit,'("Invese Fill in : ",i0)') parms%inv_fill + write(psb_out_unit,'("Inverse Threshold : ",es12.5)') parms%inv_thresh + write(psb_out_unit,'("Scaling : ",a)') parms%ilut_scale + case ('AINV','AORTH') + write(psb_out_unit,'("Inverse Threshold : ",es12.5)') parms%inv_thresh + write(psb_out_unit,'("Invese Fill in : ",i0)') parms%inv_fill + write(psb_out_unit,'("Orthogonalization : ",a)') parms%orth_alg + write(psb_out_unit,'("Scaling : ",a)') parms%ilut_scale + case default + write(psb_out_unit,'("Unknown diagonal solver")') + end select + end if write(psb_out_unit,'("Iterative method : ",a)') kmethd write(psb_out_unit,'(" ")') else ! wrong number of parameter, print an error message and exit - call pr_usage(izero) + call pr_usage(izero) call psb_abort(ctxt) stop 1 endif @@ -800,20 +890,27 @@ contains call psb_bcast(ctxt,itmax) call psb_bcast(ctxt,itrace) call psb_bcast(ctxt,irst) - + call psb_bcast(ctxt,parms%alg) + call psb_bcast(ctxt,parms%fill) + call psb_bcast(ctxt,parms%inv_fill) + call psb_bcast(ctxt,parms%thresh) + call psb_bcast(ctxt,parms%inv_thresh) + call psb_bcast(ctxt,parms%orth_alg) + call psb_bcast(ctxt,parms%ilut_scale) + return end subroutine get_parms ! - ! print an error message - ! + ! print an error message + ! subroutine pr_usage(iout) integer(psb_ipk_) :: iout write(iout,*)'incorrect parameter(s) found' write(iout,*)' usage: pde2d90 methd prec dim & - &[ipart istop itmax itrace]' + &[ipart istop itmax itrace]' write(iout,*)' where:' - write(iout,*)' methd: cgstab cgs rgmres bicgstabl' + write(iout,*)' methd: cgstab cgs rgmres bicgstabl' write(iout,*)' prec : bjac diag none' write(iout,*)' dim number of points along each axis' write(iout,*)' the size of the resulting linear ' @@ -821,11 +918,9 @@ contains write(iout,*)' ipart data partition 1 3 ' write(iout,*)' istop stopping criterion 1, 2 ' write(iout,*)' itmax maximum number of iterations [500] ' - write(iout,*)' itrace <=0 (no tracing, default) or ' + write(iout,*)' itrace <=0 (no tracing, default) or ' write(iout,*)' >= 1 do tracing every itrace' - write(iout,*)' iterations ' + write(iout,*)' iterations ' end subroutine pr_usage end program psb_d_pde2d - - diff --git a/test/pargen/psb_d_pde3d.f90 b/test/pargen/psb_d_pde3d.f90 index 8cc14086..4630d946 100644 --- a/test/pargen/psb_d_pde3d.f90 +++ b/test/pargen/psb_d_pde3d.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,23 +27,23 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! ! File: psb_d_pde3d.f90 ! ! Program: psb_d_pde3d ! This sample program solves a linear system obtained by discretizing a -! PDE with Dirichlet BCs. -! +! PDE with Dirichlet BCs. +! ! ! The PDE is a general second order equation in 3d ! -! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) +! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) ! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f -! dxdx dydy dzdz dx dy dz +! dxdx dydy dzdz dx dy dz ! ! with Dirichlet boundary conditions -! u = g +! u = g ! ! on the unit cube 0<=x,y,z<=1. ! @@ -60,37 +60,37 @@ ! module psb_d_pde3d_mod - + use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_lpk_, psb_desc_type,& & psb_dspmat_type, psb_d_vect_type, dzero,& & psb_d_base_sparse_mat, psb_d_base_vect_type, & & psb_i_base_vect_type, psb_l_base_vect_type - interface + interface function d_func_3d(x,y,z) result(val) import :: psb_dpk_ real(psb_dpk_), intent(in) :: x,y,z real(psb_dpk_) :: val end function d_func_3d - end interface + end interface interface psb_gen_pde3d module procedure psb_d_gen_pde3d end interface psb_gen_pde3d - + contains function d_null_func_3d(x,y,z) result(val) real(psb_dpk_), intent(in) :: x,y,z real(psb_dpk_) :: val - + val = dzero end function d_null_func_3d ! - ! functions parametrizing the differential equation - ! + ! functions parametrizing the differential equation + ! ! ! Note: b1, b2 and b3 are the coefficients of the first @@ -103,70 +103,70 @@ contains ! function b1(x,y,z) use psb_base_mod, only : psb_dpk_, done, dzero - implicit none + implicit none real(psb_dpk_) :: b1 real(psb_dpk_), intent(in) :: x,y,z b1=dzero end function b1 function b2(x,y,z) use psb_base_mod, only : psb_dpk_, done, dzero - implicit none + implicit none real(psb_dpk_) :: b2 real(psb_dpk_), intent(in) :: x,y,z b2=dzero end function b2 function b3(x,y,z) use psb_base_mod, only : psb_dpk_, done, dzero - implicit none + implicit none real(psb_dpk_) :: b3 - real(psb_dpk_), intent(in) :: x,y,z + real(psb_dpk_), intent(in) :: x,y,z b3=dzero end function b3 function c(x,y,z) use psb_base_mod, only : psb_dpk_, done, dzero - implicit none + implicit none real(psb_dpk_) :: c - real(psb_dpk_), intent(in) :: x,y,z + real(psb_dpk_), intent(in) :: x,y,z c=dzero end function c function a1(x,y,z) use psb_base_mod, only : psb_dpk_, done, dzero - implicit none - real(psb_dpk_) :: a1 + implicit none + real(psb_dpk_) :: a1 real(psb_dpk_), intent(in) :: x,y,z a1=done/80 end function a1 function a2(x,y,z) use psb_base_mod, only : psb_dpk_, done, dzero - implicit none + implicit none real(psb_dpk_) :: a2 real(psb_dpk_), intent(in) :: x,y,z a2=done/80 end function a2 function a3(x,y,z) use psb_base_mod, only : psb_dpk_, done, dzero - implicit none + implicit none real(psb_dpk_) :: a3 real(psb_dpk_), intent(in) :: x,y,z a3=done/80 end function a3 function g(x,y,z) use psb_base_mod, only : psb_dpk_, done, dzero - implicit none + implicit none real(psb_dpk_) :: g real(psb_dpk_), intent(in) :: x,y,z g = dzero if (x == done) then g = done - else if (x == dzero) then + else if (x == dzero) then g = exp(y**2-z**2) end if end function g - + ! ! subroutine to allocate and fill in the coefficient matrix and - ! the rhs. + ! the rhs. ! subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& & f,amold,vmold,imold,partition,nrl,iv) @@ -174,13 +174,13 @@ contains use psb_util_mod ! ! Discretizes the partial differential equation - ! - ! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) + ! + ! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) ! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f - ! dxdx dydy dzdz dx dy dz + ! dxdx dydy dzdz dx dy dz ! ! with Dirichlet boundary conditions - ! u = g + ! u = g ! ! on the unit cube 0<=x,y,z<=1. ! @@ -197,7 +197,7 @@ contains character(len=*) :: afmt procedure(d_func_3d), optional :: f class(psb_d_base_sparse_mat), optional :: amold - class(psb_d_base_vect_type), optional :: vmold + class(psb_d_base_vect_type), optional :: vmold class(psb_i_base_vect_type), optional :: imold integer(psb_ipk_), optional :: partition, nrl,iv(:) @@ -238,7 +238,7 @@ contains call psb_info(ctxt, iam, np) - if (present(f)) then + if (present(f)) then f_ => f else f_ => d_null_func_3d @@ -258,10 +258,10 @@ contains else partition_ = 3 end if - + ! initialize array descriptor and sparse matrix storage. provide an - ! estimate of the number of non zeroes - + ! estimate of the number of non zeroes + m = (1_psb_lpk_*idim)*idim*idim n = m nnz = ((n*7)/(np)) @@ -269,8 +269,8 @@ contains t0 = psb_wtime() select case(partition_) case(1) - ! A BLOCK partition - if (present(nrl)) then + ! A BLOCK partition + if (present(nrl)) then nr = nrl else ! @@ -281,46 +281,46 @@ contains end if nt = nr - call psb_sum(ctxt,nt) - if (nt /= m) then + call psb_sum(ctxt,nt) + if (nt /= m) then write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m info = -1 call psb_barrier(ctxt) call psb_abort(ctxt) - return + return end if ! ! First example of use of CDALL: specify for each process a number of ! contiguous rows - ! + ! call psb_cdall(ctxt,desc_a,info,nl=nr) myidx = desc_a%get_global_indices() nlr = size(myidx) case(2) ! A partition defined by the user through IV - - if (present(iv)) then + + if (present(iv)) then if (size(iv) /= m) then write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m info = -1 call psb_barrier(ctxt) call psb_abort(ctxt) - return + return end if else write(psb_err_unit,*) iam, 'Initialization error: IV not present' info = -1 call psb_barrier(ctxt) call psb_abort(ctxt) - return + return end if ! ! Second example of use of CDALL: specify for each row the - ! process that owns it - ! + ! process that owns it + ! call psb_cdall(ctxt,desc_a,info,vg=iv) myidx = desc_a%get_global_indices() nlr = size(myidx) @@ -336,7 +336,7 @@ contains npz = npdims(3) allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz)) - ! We can reuse idx2ijk for process indices as well. + ! We can reuse idx2ijk for process indices as well. call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0) ! Now let's split the 3D cube in hexahedra call dist1Didx(bndx,idim,npx) @@ -346,7 +346,7 @@ contains call dist1Didx(bndz,idim,npz) mynz = bndz(iamz+1)-bndz(iamz) - ! How many indices do I own? + ! How many indices do I own? nlr = mynx*myny*mynz allocate(myidx(nlr)) ! Now, let's generate the list of indices I own @@ -370,9 +370,8 @@ contains ! ! Third example of use of CDALL: specify for each process ! the set of global indices it owns. - ! + ! call psb_cdall(ctxt,desc_a,info,vl=myidx) - case default write(psb_err_unit,*) iam, 'Initialization error: should not get here' info = -1 @@ -381,9 +380,9 @@ contains return end select - + if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz) - ! define rhs from boundary conditions; also build initial guess + ! define rhs from boundary conditions; also build initial guess if (info == psb_success_) call psb_geall(xv,desc_a,info) if (info == psb_success_) call psb_geall(bv,desc_a,info) @@ -398,12 +397,12 @@ contains end if ! we build an auxiliary matrix consisting of one row at a - ! time; just a small matrix. might be extended to generate - ! a bunch of rows per call. - ! + ! time; just a small matrix. might be extended to generate + ! a bunch of rows per call. + ! allocate(val(20*nb),irow(20*nb),& &icol(20*nb),stat=info) - if (info /= psb_success_ ) then + if (info /= psb_success_ ) then info=psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 @@ -416,11 +415,11 @@ contains call psb_barrier(ctxt) t1 = psb_wtime() do ii=1, nlr,nb - ib = min(nb,nlr-ii+1) + ib = min(nb,nlr-ii+1) icoeff = 1 do k=1,ib i=ii+k-1 - ! local matrix pointer + ! local matrix pointer glob_row=myidx(i) ! compute gridpoint coordinates call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) @@ -430,11 +429,11 @@ contains z = (iz-1)*deltah zt(k) = f_(x,y,z) ! internal point: build discretization - ! + ! ! term depending on (x-1,y,z) ! val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2 - if (ix == 1) then + if (ix == 1) then zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k) else call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim) @@ -443,19 +442,19 @@ contains endif ! term depending on (x,y-1,z) val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2 - if (iy == 1) then + if (iy == 1) then zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k) else - call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) + call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) irow(icoeff) = glob_row icoeff = icoeff+1 endif ! term depending on (x,y,z-1) val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2 - if (iz == 1) then + if (iz == 1) then zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k) else - call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) + call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) irow(icoeff) = glob_row icoeff = icoeff+1 endif @@ -463,33 +462,33 @@ contains ! term depending on (x,y,z) val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah & & + c(x,y,z) - call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim) + call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim) irow(icoeff) = glob_row - icoeff = icoeff+1 + icoeff = icoeff+1 ! term depending on (x,y,z+1) val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2 - if (iz == idim) then + if (iz == idim) then zt(k) = g(x,y,done)*(-val(icoeff)) + zt(k) else - call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim) + call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim) irow(icoeff) = glob_row icoeff = icoeff+1 endif ! term depending on (x,y+1,z) val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2 - if (iy == idim) then + if (iy == idim) then zt(k) = g(x,done,z)*(-val(icoeff)) + zt(k) else - call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim) + call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim) irow(icoeff) = glob_row icoeff = icoeff+1 endif ! term depending on (x+1,y,z) val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2 - if (ix==idim) then + if (ix==idim) then zt(k) = g(done,y,z)*(-val(icoeff)) + zt(k) else - call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim) + call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim) irow(icoeff) = glob_row icoeff = icoeff+1 endif @@ -520,8 +519,8 @@ contains tcdasb = psb_wtime()-t1 call psb_barrier(ctxt) t1 = psb_wtime() - if (info == psb_success_) then - if (present(amold)) then + if (info == psb_success_) then + if (present(amold)) then call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,mold=amold) else call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,afmt=afmt) @@ -544,7 +543,7 @@ contains end if tasb = psb_wtime()-t1 call psb_barrier(ctxt) - ttot = psb_wtime() - t0 + ttot = psb_wtime() - t0 call psb_amx(ctxt,talc) call psb_amx(ctxt,tgen) @@ -586,9 +585,9 @@ program psb_d_pde3d integer(psb_ipk_) :: idim integer(psb_epk_) :: system_size - ! miscellaneous + ! miscellaneous real(psb_dpk_), parameter :: one = done - real(psb_dpk_) :: t1, t2, tprec + real(psb_dpk_) :: t1, t2, tprec ! sparse matrix and preconditioner type(psb_dspmat_type) :: a @@ -606,6 +605,14 @@ program psb_d_pde3d integer(psb_epk_) :: amatsize, precsize, descsize, d2size real(psb_dpk_) :: err, eps + ! Parameters for solvers in Block-Jacobi preconditioner + type ainvparms + character(len=12) :: alg, orth_alg, ilu_alg, ilut_scale + integer(psb_ipk_) :: fill, inv_fill + real(psb_dpk_) :: thresh, inv_thresh + end type ainvparms + type(ainvparms) :: parms + ! other variables integer(psb_ipk_) :: info, i character(len=20) :: name,ch_err @@ -617,7 +624,7 @@ program psb_d_pde3d call psb_init(ctxt) call psb_info(ctxt,iam,np) - if (iam < 0) then + if (iam < 0) then ! This should not happen, but just in case call psb_exit(ctxt) stop @@ -628,21 +635,20 @@ program psb_d_pde3d ! ! Hello world ! - if (iam == psb_root_) then + if (iam == psb_root_) then write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ write(*,*) 'This is the ',trim(name),' sample program' end if ! ! get parameters ! - call get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart) - + call get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart,parms) ! - ! allocate and fill in the coefficient matrix, rhs and initial guess + ! allocate and fill in the coefficient matrix, rhs and initial guess ! call psb_barrier(ctxt) t1 = psb_wtime() - call psb_gen_pde3d(ctxt,idim,a,bv,xxv,desc_a,afmt,info,partition=ipart) + call psb_gen_pde3d(ctxt,idim,a,bv,xxv,desc_a,afmt,info,partition=ipart) call psb_barrier(ctxt) t2 = psb_wtime() - t1 if(info /= psb_success_) then @@ -655,9 +661,43 @@ program psb_d_pde3d if (iam == psb_root_) write(psb_out_unit,'(" ")') ! ! prepare the preconditioner. - ! + ! if(iam == psb_root_) write(psb_out_unit,'("Setting preconditioner to : ",a)')ptype call prec%init(ctxt,ptype,info) + ! + ! Set the options for the BJAC preconditioner + ! + if (psb_toupper(ptype) == "BJAC") then + call prec%set('sub_solve', parms%alg, info) + select case (psb_toupper(parms%alg)) + case ("ILU") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('ilu_alg', parms%ilu_alg, info) + case ("ILUT") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('sub_iluthrs', parms%thresh, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + case ("AINV") + call prec%set('inv_thresh', parms%inv_thresh, info) + call prec%set('inv_fillin', parms%inv_fill, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + call prec%set('ainv_alg', parms%orth_alg, info) + case ("INVK") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('inv_fillin', parms%inv_fill, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + case ("INVT") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('inv_fillin', parms%inv_fill, info) + call prec%set('sub_iluthrs', parms%thresh, info) + call prec%set('inv_thresh', parms%inv_thresh, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + case default + ! Do nothing, use default setting in the init routine + end select + else + ! nothing to set for NONE or DIAG preconditioner + end if call psb_barrier(ctxt) t1 = psb_wtime() @@ -677,14 +717,14 @@ program psb_d_pde3d if (iam == psb_root_) write(psb_out_unit,'(" ")') call prec%descr() ! - ! iterative method parameters + ! iterative method parameters ! if(iam == psb_root_) write(psb_out_unit,'("Calling iterative method ",a)')kmethd call psb_barrier(ctxt) - t1 = psb_wtime() + t1 = psb_wtime() eps = 1.d-6 - call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,& - & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst) + call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,& + & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst) if(info /= psb_success_) then info=psb_err_from_subroutine_ @@ -714,14 +754,14 @@ program psb_d_pde3d write(psb_out_unit,'("Convergence indicator on exit : ",es12.5)')err write(psb_out_unit,'("Info on exit : ",i12)')info write(psb_out_unit,'("Total memory occupation for A: ",i12)')amatsize - write(psb_out_unit,'("Total memory occupation for PREC: ",i12)')precsize + write(psb_out_unit,'("Total memory occupation for PREC: ",i12)')precsize write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize write(psb_out_unit,'("Storage format for A: ",a)') a%get_fmt() write(psb_out_unit,'("Storage format for DESC_A: ",a)') desc_a%get_fmt() end if - ! + ! ! cleanup storage and exit ! call psb_gefree(bv,desc_a,info) @@ -747,13 +787,15 @@ contains ! ! get iteration parameters from standard input ! - subroutine get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart) + subroutine get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,& + & itmax,itrace,irst,ipart,parms) type(psb_ctxt_type) :: ctxt character(len=*) :: kmethd, ptype, afmt integer(psb_ipk_) :: idim, istopc,itmax,itrace,irst,ipart integer(psb_ipk_) :: np, iam integer(psb_ipk_) :: ip, inp_unit character(len=1024) :: filename + type(ainvparms) :: parms call psb_info(ctxt, iam, np) @@ -782,12 +824,12 @@ contains if (ip >= 4) then read(inp_unit,*) ipart else - ipart = 3 + ipart = 3 endif if (ip >= 5) then read(inp_unit,*) istopc else - istopc=1 + istopc=1 endif if (ip >= 6) then read(inp_unit,*) itmax @@ -804,8 +846,27 @@ contains else irst=1 endif + if (ip >= 9) then + read(inp_unit,*) parms%alg + read(inp_unit,*) parms%ilu_alg + read(inp_unit,*) parms%ilut_scale + read(inp_unit,*) parms%fill + read(inp_unit,*) parms%inv_fill + read(inp_unit,*) parms%thresh + read(inp_unit,*) parms%inv_thresh + read(inp_unit,*) parms%orth_alg + else + parms%alg = 'ILU' ! Block Solver ILU,ILUT,INVK,AINVT,AORTH + parms%ilu_alg = 'NONE' ! If ILU : MILU or NONE othewise ignored + parms%ilut_scale = 'NONE' ! If ILUT: NONE, MAXVAL, DIAG, ARWSUM, ACLSUM, ARCSUM + parms%fill = 0 ! Level of fill for forward factorization + parms%inv_fill = 1 ! Level of fill for inverse factorization (only INVK) + parms%thresh = 1E-1_psb_dpk_ ! Threshold for forward factorization + parms%inv_thresh = 1E-1_psb_dpk_ ! Threshold for inverse factorization + parms%orth_alg = 'LLK' ! What orthogonalization algorithm? + endif - write(psb_out_unit,'("Solving matrix : ell1")') + write(psb_out_unit,'("Solving matrix : ell1")') write(psb_out_unit,& & '("Grid dimensions : ",i4," x ",i4," x ",i4)') & & idim,idim,idim @@ -820,11 +881,40 @@ contains write(psb_out_unit,'("Unknown data distrbution, defaulting to 3D")') end select write(psb_out_unit,'("Preconditioner : ",a)') ptype + if( psb_toupper(ptype) == "BJAC" ) then + write(psb_out_unit,'("Block subsolver : ",a)') parms%alg + select case (psb_toupper(parms%alg)) + case ('ILU') + write(psb_out_unit,'("Fill in : ",i0)') parms%fill + write(psb_out_unit,'("MILU : ",a)') parms%ilu_alg + case ('ILUT') + write(psb_out_unit,'("Fill in : ",i0)') parms%fill + write(psb_out_unit,'("Threshold : ",es12.5)') parms%thresh + write(psb_out_unit,'("Scaling : ",a)') parms%ilut_scale + case ('INVK') + write(psb_out_unit,'("Fill in : ",i0)') parms%fill + write(psb_out_unit,'("Invese Fill in : ",i0)') parms%inv_fill + write(psb_out_unit,'("Scaling : ",a)') parms%ilut_scale + case ('INVT') + write(psb_out_unit,'("Fill in : ",i0)') parms%fill + write(psb_out_unit,'("Threshold : ",es12.5)') parms%thresh + write(psb_out_unit,'("Invese Fill in : ",i0)') parms%inv_fill + write(psb_out_unit,'("Inverse Threshold : ",es12.5)') parms%inv_thresh + write(psb_out_unit,'("Scaling : ",a)') parms%ilut_scale + case ('AINV','AORTH') + write(psb_out_unit,'("Inverse Threshold : ",es12.5)') parms%inv_thresh + write(psb_out_unit,'("Invese Fill in : ",i0)') parms%inv_fill + write(psb_out_unit,'("Orthogonalization : ",a)') parms%orth_alg + write(psb_out_unit,'("Scaling : ",a)') parms%ilut_scale + case default + write(psb_out_unit,'("Unknown diagonal solver")') + end select + end if write(psb_out_unit,'("Iterative method : ",a)') kmethd write(psb_out_unit,'(" ")') else ! wrong number of parameter, print an error message and exit - call pr_usage(izero) + call pr_usage(izero) call psb_abort(ctxt) stop 1 endif @@ -843,20 +933,27 @@ contains call psb_bcast(ctxt,itmax) call psb_bcast(ctxt,itrace) call psb_bcast(ctxt,irst) + call psb_bcast(ctxt,parms%alg) + call psb_bcast(ctxt,parms%fill) + call psb_bcast(ctxt,parms%inv_fill) + call psb_bcast(ctxt,parms%thresh) + call psb_bcast(ctxt,parms%inv_thresh) + call psb_bcast(ctxt,parms%orth_alg) + call psb_bcast(ctxt,parms%ilut_scale) return end subroutine get_parms ! - ! print an error message - ! + ! print an error message + ! subroutine pr_usage(iout) integer(psb_ipk_) :: iout write(iout,*)'incorrect parameter(s) found' write(iout,*)' usage: pde3d90 methd prec dim & - &[istop itmax itrace]' + &[istop itmax itrace]' write(iout,*)' where:' - write(iout,*)' methd: cgstab cgs rgmres bicgstabl' + write(iout,*)' methd: cgstab cgs rgmres bicgstabl' write(iout,*)' prec : bjac diag none' write(iout,*)' dim number of points along each axis' write(iout,*)' the size of the resulting linear ' @@ -864,11 +961,9 @@ contains write(iout,*)' ipart data partition 1 3 ' write(iout,*)' istop stopping criterion 1, 2 ' write(iout,*)' itmax maximum number of iterations [500] ' - write(iout,*)' itrace <=0 (no tracing, default) or ' + write(iout,*)' itrace <=0 (no tracing, default) or ' write(iout,*)' >= 1 do tracing every itrace' - write(iout,*)' iterations ' + write(iout,*)' iterations ' end subroutine pr_usage end program psb_d_pde3d - - diff --git a/test/pargen/psb_s_pde2d.f90 b/test/pargen/psb_s_pde2d.f90 index 63cb4860..f055e5e6 100644 --- a/test/pargen/psb_s_pde2d.f90 +++ b/test/pargen/psb_s_pde2d.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,23 +27,23 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! ! File: psb_s_pde2d.f90 ! ! Program: psb_s_pde2d ! This sample program solves a linear system obtained by discretizing a -! PDE with Dirichlet BCs. -! +! PDE with Dirichlet BCs. +! ! ! The PDE is a general second order equation in 2d ! -! a1 dd(u) a2 dd(u) b1 d(u) b2 d(u) +! a1 dd(u) a2 dd(u) b1 d(u) b2 d(u) ! - ------ - ------ ----- + ------ + c u = f -! dxdx dydy dx dy +! dxdx dydy dx dy ! ! with Dirichlet boundary conditions -! u = g +! u = g ! ! on the unit square 0<=x,y<=1. ! @@ -63,31 +63,31 @@ module psb_s_pde2d_mod & psb_sspmat_type, psb_s_vect_type, szero,& & psb_s_base_sparse_mat, psb_s_base_vect_type, psb_i_base_vect_type - interface + interface function s_func_2d(x,y) result(val) import :: psb_spk_ real(psb_spk_), intent(in) :: x,y real(psb_spk_) :: val end function s_func_2d - end interface + end interface interface psb_gen_pde2d module procedure psb_s_gen_pde2d end interface psb_gen_pde2d - + contains - + function s_null_func_2d(x,y) result(val) real(psb_spk_), intent(in) :: x,y real(psb_spk_) :: val - + val = szero end function s_null_func_2d ! - ! functions parametrizing the differential equation + ! functions parametrizing the differential equation ! ! @@ -101,48 +101,48 @@ contains ! function b1(x,y) use psb_base_mod, only : psb_spk_, sone, szero - implicit none + implicit none real(psb_spk_) :: b1 real(psb_spk_), intent(in) :: x,y b1=szero end function b1 function b2(x,y) use psb_base_mod, only : psb_spk_, sone, szero - implicit none + implicit none real(psb_spk_) :: b2 real(psb_spk_), intent(in) :: x,y b2=szero end function b2 function c(x,y) use psb_base_mod, only : psb_spk_, sone, szero - implicit none + implicit none real(psb_spk_) :: c real(psb_spk_), intent(in) :: x,y c=0.d0 end function c function a1(x,y) use psb_base_mod, only : psb_spk_, sone, szero - implicit none - real(psb_spk_) :: a1 + implicit none + real(psb_spk_) :: a1 real(psb_spk_), intent(in) :: x,y a1=sone/80 end function a1 function a2(x,y) use psb_base_mod, only : psb_spk_, sone, szero - implicit none + implicit none real(psb_spk_) :: a2 real(psb_spk_), intent(in) :: x,y a2=sone/80 end function a2 function g(x,y) use psb_base_mod, only : psb_spk_, sone, szero - implicit none + implicit none real(psb_spk_) :: g real(psb_spk_), intent(in) :: x,y g = szero if (x == sone) then g = sone - else if (x == szero) then + else if (x == szero) then g = exp(-y**2) end if end function g @@ -150,7 +150,7 @@ contains ! ! subroutine to allocate and fill in the coefficient matrix and - ! the rhs. + ! the rhs. ! subroutine psb_s_gen_pde2d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& & f,amold,vmold,imold,partition,nrl,iv) @@ -158,13 +158,13 @@ contains use psb_util_mod ! ! Discretizes the partial differential equation - ! - ! a1 dd(u) a2 dd(u) b1 d(u) b2 d(u) + ! + ! a1 dd(u) a2 dd(u) b1 d(u) b2 d(u) ! - ------ - ------ + ----- + ------ + c u = f - ! dxdx dydy dx dy + ! dxdx dydy dx dy ! ! with Dirichlet boundary conditions - ! u = g + ! u = g ! ! on the unit square 0<=x,y<=1. ! @@ -222,7 +222,7 @@ contains call psb_info(ctxt, iam, np) - if (present(f)) then + if (present(f)) then f_ => f else f_ => s_null_func_2d @@ -242,9 +242,9 @@ contains else partition_ = 3 end if - + ! initialize array descriptor and sparse matrix storage. provide an - ! estimate of the number of non zeroes + ! estimate of the number of non zeroes m = (1_psb_lpk_)*idim*idim n = m @@ -253,8 +253,8 @@ contains t0 = psb_wtime() select case(partition_) case(1) - ! A BLOCK partition - if (present(nrl)) then + ! A BLOCK partition + if (present(nrl)) then nr = nrl else ! @@ -265,46 +265,46 @@ contains end if nt = nr - call psb_sum(ctxt,nt) - if (nt /= m) then + call psb_sum(ctxt,nt) + if (nt /= m) then write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m info = -1 call psb_barrier(ctxt) call psb_abort(ctxt) - return + return end if ! ! First example of use of CDALL: specify for each process a number of ! contiguous rows - ! + ! call psb_cdall(ctxt,desc_a,info,nl=nr) myidx = desc_a%get_global_indices() nlr = size(myidx) case(2) ! A partition defined by the user through IV - - if (present(iv)) then + + if (present(iv)) then if (size(iv) /= m) then write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m info = -1 call psb_barrier(ctxt) call psb_abort(ctxt) - return + return end if else write(psb_err_unit,*) iam, 'Initialization error: IV not present' info = -1 call psb_barrier(ctxt) call psb_abort(ctxt) - return + return end if ! ! Second example of use of CDALL: specify for each row the - ! process that owns it - ! + ! process that owns it + ! call psb_cdall(ctxt,desc_a,info,vg=iv) myidx = desc_a%get_global_indices() nlr = size(myidx) @@ -319,7 +319,7 @@ contains npy = npdims(2) allocate(bndx(0:npx),bndy(0:npy)) - ! We can reuse idx2ijk for process indices as well. + ! We can reuse idx2ijk for process indices as well. call idx2ijk(iamx,iamy,iam,npx,npy,base=0) ! Now let's split the 2D square in rectangles call dist1Didx(bndx,idim,npx) @@ -327,7 +327,7 @@ contains call dist1Didx(bndy,idim,npy) myny = bndy(iamy+1)-bndy(iamy) - ! How many indices do I own? + ! How many indices do I own? nlr = mynx*myny allocate(myidx(nlr)) ! Now, let's generate the list of indices I own @@ -349,9 +349,8 @@ contains ! ! Third example of use of CDALL: specify for each process ! the set of global indices it owns. - ! + ! call psb_cdall(ctxt,desc_a,info,vl=myidx) - case default write(psb_err_unit,*) iam, 'Initialization error: should not get here' info = -1 @@ -360,9 +359,9 @@ contains return end select - + if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz) - ! define rhs from boundary conditions; also build initial guess + ! define rhs from boundary conditions; also build initial guess if (info == psb_success_) call psb_geall(xv,desc_a,info) if (info == psb_success_) call psb_geall(bv,desc_a,info) @@ -377,12 +376,12 @@ contains end if ! we build an auxiliary matrix consisting of one row at a - ! time; just a small matrix. might be extended to generate - ! a bunch of rows per call. - ! + ! time; just a small matrix. might be extended to generate + ! a bunch of rows per call. + ! allocate(val(20*nb),irow(20*nb),& &icol(20*nb),stat=info) - if (info /= psb_success_ ) then + if (info /= psb_success_ ) then info=psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 @@ -395,11 +394,11 @@ contains call psb_barrier(ctxt) t1 = psb_wtime() do ii=1, nlr,nb - ib = min(nb,nlr-ii+1) + ib = min(nb,nlr-ii+1) icoeff = 1 do k=1,ib i=ii+k-1 - ! local matrix pointer + ! local matrix pointer glob_row=myidx(i) ! compute gridpoint coordinates call idx2ijk(ix,iy,glob_row,idim,idim) @@ -409,11 +408,11 @@ contains zt(k) = f_(x,y) ! internal point: build discretization - ! + ! ! term depending on (x-1,y) ! val(icoeff) = -a1(x,y)/sqdeltah-b1(x,y)/deltah2 - if (ix == 1) then + if (ix == 1) then zt(k) = g(szero,y)*(-val(icoeff)) + zt(k) else call ijk2idx(icol(icoeff),ix-1,iy,idim,idim) @@ -422,7 +421,7 @@ contains endif ! term depending on (x,y-1) val(icoeff) = -a2(x,y)/sqdeltah-b2(x,y)/deltah2 - if (iy == 1) then + if (iy == 1) then zt(k) = g(x,szero)*(-val(icoeff)) + zt(k) else call ijk2idx(icol(icoeff),ix,iy-1,idim,idim) @@ -434,10 +433,10 @@ contains val(icoeff)=(2*sone)*(a1(x,y) + a2(x,y))/sqdeltah + c(x,y) call ijk2idx(icol(icoeff),ix,iy,idim,idim) irow(icoeff) = glob_row - icoeff = icoeff+1 + icoeff = icoeff+1 ! term depending on (x,y+1) val(icoeff)=-a2(x,y)/sqdeltah+b2(x,y)/deltah2 - if (iy == idim) then + if (iy == idim) then zt(k) = g(x,sone)*(-val(icoeff)) + zt(k) else call ijk2idx(icol(icoeff),ix,iy+1,idim,idim) @@ -446,7 +445,7 @@ contains endif ! term depending on (x+1,y) val(icoeff)=-a1(x,y)/sqdeltah+b1(x,y)/deltah2 - if (ix==idim) then + if (ix==idim) then zt(k) = g(sone,y)*(-val(icoeff)) + zt(k) else call ijk2idx(icol(icoeff),ix+1,iy,idim,idim) @@ -480,8 +479,8 @@ contains tcdasb = psb_wtime()-t1 call psb_barrier(ctxt) t1 = psb_wtime() - if (info == psb_success_) then - if (present(amold)) then + if (info == psb_success_) then + if (present(amold)) then call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,mold=amold) else call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,afmt=afmt) @@ -504,7 +503,7 @@ contains end if tasb = psb_wtime()-t1 call psb_barrier(ctxt) - ttot = psb_wtime() - t0 + ttot = psb_wtime() - t0 call psb_amx(ctxt,talc) call psb_amx(ctxt,tgen) @@ -545,9 +544,9 @@ program psb_s_pde2d integer(psb_ipk_) :: idim integer(psb_epk_) :: system_size - ! miscellaneous + ! miscellaneous real(psb_spk_), parameter :: one = sone - real(psb_dpk_) :: t1, t2, tprec + real(psb_dpk_) :: t1, t2, tprec ! sparse matrix and preconditioner type(psb_sspmat_type) :: a @@ -565,6 +564,14 @@ program psb_s_pde2d integer(psb_epk_) :: amatsize, precsize, descsize, d2size real(psb_spk_) :: err, eps + ! Parameters for solvers in Block-Jacobi preconditioner + type ainvparms + character(len=12) :: alg, orth_alg, ilu_alg, ilut_scale + integer(psb_ipk_) :: fill, inv_fill + real(psb_spk_) :: thresh, inv_thresh + end type ainvparms + type(ainvparms) :: parms + ! other variables integer(psb_ipk_) :: info, i character(len=20) :: name,ch_err @@ -576,7 +583,7 @@ program psb_s_pde2d call psb_init(ctxt) call psb_info(ctxt,iam,np) - if (iam < 0) then + if (iam < 0) then ! This should not happen, but just in case call psb_exit(ctxt) stop @@ -587,21 +594,21 @@ program psb_s_pde2d ! ! Hello world ! - if (iam == psb_root_) then + if (iam == psb_root_) then write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ write(*,*) 'This is the ',trim(name),' sample program' end if ! ! get parameters ! - call get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart) + call get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart,parms) ! - ! allocate and fill in the coefficient matrix, rhs and initial guess + ! allocate and fill in the coefficient matrix, rhs and initial guess ! call psb_barrier(ctxt) t1 = psb_wtime() - call psb_gen_pde2d(ctxt,idim,a,bv,xxv,desc_a,afmt,info,partition=ipart) + call psb_gen_pde2d(ctxt,idim,a,bv,xxv,desc_a,afmt,info,partition=ipart) call psb_barrier(ctxt) t2 = psb_wtime() - t1 if(info /= psb_success_) then @@ -614,9 +621,43 @@ program psb_s_pde2d if (iam == psb_root_) write(psb_out_unit,'(" ")') ! ! prepare the preconditioner. - ! + ! if(iam == psb_root_) write(psb_out_unit,'("Setting preconditioner to : ",a)')ptype call prec%init(ctxt,ptype,info) + ! + ! Set the options for the BJAC preconditioner + ! + if (psb_toupper(ptype) == "BJAC") then + call prec%set('sub_solve', parms%alg, info) + select case (psb_toupper(parms%alg)) + case ("ILU") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('ilu_alg', parms%ilu_alg, info) + case ("ILUT") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('sub_iluthrs', parms%thresh, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + case ("AINV") + call prec%set('inv_thresh', parms%inv_thresh, info) + call prec%set('inv_fillin', parms%inv_fill, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + call prec%set('ainv_alg', parms%orth_alg, info) + case ("INVK") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('inv_fillin', parms%inv_fill, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + case ("INVT") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('inv_fillin', parms%inv_fill, info) + call prec%set('sub_iluthrs', parms%thresh, info) + call prec%set('inv_thresh', parms%inv_thresh, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + case default + ! Do nothing, use default setting in the init routine + end select + else + ! nothing to set for NONE or DIAG preconditioner + end if call psb_barrier(ctxt) t1 = psb_wtime() @@ -636,14 +677,14 @@ program psb_s_pde2d if (iam == psb_root_) write(psb_out_unit,'(" ")') call prec%descr() ! - ! iterative method parameters + ! iterative method parameters ! if(iam == psb_root_) write(psb_out_unit,'("Calling iterative method ",a)')kmethd call psb_barrier(ctxt) - t1 = psb_wtime() + t1 = psb_wtime() eps = 1.d-6 - call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,& - & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst) + call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,& + & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst) if(info /= psb_success_) then info=psb_err_from_subroutine_ @@ -673,14 +714,14 @@ program psb_s_pde2d write(psb_out_unit,'("Convergence indicator on exit : ",es12.5)')err write(psb_out_unit,'("Info on exit : ",i12)')info write(psb_out_unit,'("Total memory occupation for A: ",i12)')amatsize - write(psb_out_unit,'("Total memory occupation for PREC: ",i12)')precsize + write(psb_out_unit,'("Total memory occupation for PREC: ",i12)')precsize write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize write(psb_out_unit,'("Storage format for A: ",a)') a%get_fmt() write(psb_out_unit,'("Storage format for DESC_A: ",a)') desc_a%get_fmt() end if - ! + ! ! cleanup storage and exit ! call psb_gefree(bv,desc_a,info) @@ -706,13 +747,14 @@ contains ! ! get iteration parameters from standard input ! - subroutine get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart) + subroutine get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart,parms) type(psb_ctxt_type) :: ctxt character(len=*) :: kmethd, ptype, afmt integer(psb_ipk_) :: idim, istopc,itmax,itrace,irst,ipart integer(psb_ipk_) :: np, iam integer(psb_ipk_) :: ip, inp_unit character(len=1024) :: filename + type(ainvparms) :: parms call psb_info(ctxt, iam, np) @@ -741,12 +783,12 @@ contains if (ip >= 4) then read(inp_unit,*) ipart else - ipart = 3 + ipart = 3 endif if (ip >= 5) then read(inp_unit,*) istopc else - istopc=1 + istopc=1 endif if (ip >= 6) then read(inp_unit,*) itmax @@ -763,8 +805,27 @@ contains else irst=1 endif + if (ip >= 9) then + read(inp_unit,*) parms%alg + read(inp_unit,*) parms%ilu_alg + read(inp_unit,*) parms%ilut_scale + read(inp_unit,*) parms%fill + read(inp_unit,*) parms%inv_fill + read(inp_unit,*) parms%thresh + read(inp_unit,*) parms%inv_thresh + read(inp_unit,*) parms%orth_alg + else + parms%alg = 'ILU' ! Block Solver ILU,ILUT,INVK,AINVT,AORTH + parms%ilu_alg = 'NONE' ! If ILU : MILU or NONE othewise ignored + parms%ilut_scale = 'NONE' ! If ILUT: NONE, MAXVAL, DIAG, ARWSUM, ACLSUM, ARCSUM + parms%fill = 0 ! Level of fill for forward factorization + parms%inv_fill = 1 ! Level of fill for inverse factorization (only INVK) + parms%thresh = 1E-1_psb_spk_ ! Threshold for forward factorization + parms%inv_thresh = 1E-1_psb_spk_ ! Threshold for inverse factorization + parms%orth_alg = 'LLK' ! What orthogonalization algorithm? + endif - write(psb_out_unit,'("Solving matrix : ell1")') + write(psb_out_unit,'("Solving matrix : ell1")') write(psb_out_unit,'("Grid dimensions : ",i5," x ",i5)')idim,idim write(psb_out_unit,'("Number of processors : ",i0)') np select case(ipart) @@ -777,11 +838,40 @@ contains write(psb_out_unit,'("Unknown data distrbution, defaulting to 2D")') end select write(psb_out_unit,'("Preconditioner : ",a)') ptype + if( psb_toupper(ptype) == "BJAC" ) then + write(psb_out_unit,'("Block subsolver : ",a)') parms%alg + select case (psb_toupper(parms%alg)) + case ('ILU') + write(psb_out_unit,'("Fill in : ",i0)') parms%fill + write(psb_out_unit,'("MILU : ",a)') parms%ilu_alg + case ('ILUT') + write(psb_out_unit,'("Fill in : ",i0)') parms%fill + write(psb_out_unit,'("Threshold : ",es12.5)') parms%thresh + write(psb_out_unit,'("Scaling : ",a)') parms%ilut_scale + case ('INVK') + write(psb_out_unit,'("Fill in : ",i0)') parms%fill + write(psb_out_unit,'("Invese Fill in : ",i0)') parms%inv_fill + write(psb_out_unit,'("Scaling : ",a)') parms%ilut_scale + case ('INVT') + write(psb_out_unit,'("Fill in : ",i0)') parms%fill + write(psb_out_unit,'("Threshold : ",es12.5)') parms%thresh + write(psb_out_unit,'("Invese Fill in : ",i0)') parms%inv_fill + write(psb_out_unit,'("Inverse Threshold : ",es12.5)') parms%inv_thresh + write(psb_out_unit,'("Scaling : ",a)') parms%ilut_scale + case ('AINV','AORTH') + write(psb_out_unit,'("Inverse Threshold : ",es12.5)') parms%inv_thresh + write(psb_out_unit,'("Invese Fill in : ",i0)') parms%inv_fill + write(psb_out_unit,'("Orthogonalization : ",a)') parms%orth_alg + write(psb_out_unit,'("Scaling : ",a)') parms%ilut_scale + case default + write(psb_out_unit,'("Unknown diagonal solver")') + end select + end if write(psb_out_unit,'("Iterative method : ",a)') kmethd write(psb_out_unit,'(" ")') else ! wrong number of parameter, print an error message and exit - call pr_usage(izero) + call pr_usage(izero) call psb_abort(ctxt) stop 1 endif @@ -800,20 +890,27 @@ contains call psb_bcast(ctxt,itmax) call psb_bcast(ctxt,itrace) call psb_bcast(ctxt,irst) - + call psb_bcast(ctxt,parms%alg) + call psb_bcast(ctxt,parms%fill) + call psb_bcast(ctxt,parms%inv_fill) + call psb_bcast(ctxt,parms%thresh) + call psb_bcast(ctxt,parms%inv_thresh) + call psb_bcast(ctxt,parms%orth_alg) + call psb_bcast(ctxt,parms%ilut_scale) + return end subroutine get_parms ! - ! print an error message - ! + ! print an error message + ! subroutine pr_usage(iout) integer(psb_ipk_) :: iout write(iout,*)'incorrect parameter(s) found' write(iout,*)' usage: pde2d90 methd prec dim & - &[ipart istop itmax itrace]' + &[ipart istop itmax itrace]' write(iout,*)' where:' - write(iout,*)' methd: cgstab cgs rgmres bicgstabl' + write(iout,*)' methd: cgstab cgs rgmres bicgstabl' write(iout,*)' prec : bjac diag none' write(iout,*)' dim number of points along each axis' write(iout,*)' the size of the resulting linear ' @@ -821,11 +918,9 @@ contains write(iout,*)' ipart data partition 1 3 ' write(iout,*)' istop stopping criterion 1, 2 ' write(iout,*)' itmax maximum number of iterations [500] ' - write(iout,*)' itrace <=0 (no tracing, default) or ' + write(iout,*)' itrace <=0 (no tracing, default) or ' write(iout,*)' >= 1 do tracing every itrace' - write(iout,*)' iterations ' + write(iout,*)' iterations ' end subroutine pr_usage end program psb_s_pde2d - - diff --git a/test/pargen/psb_s_pde3d.f90 b/test/pargen/psb_s_pde3d.f90 index 90b4d042..0bc77248 100644 --- a/test/pargen/psb_s_pde3d.f90 +++ b/test/pargen/psb_s_pde3d.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,23 +27,23 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! ! File: psb_s_pde3d.f90 ! ! Program: psb_s_pde3d ! This sample program solves a linear system obtained by discretizing a -! PDE with Dirichlet BCs. -! +! PDE with Dirichlet BCs. +! ! ! The PDE is a general second order equation in 3d ! -! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) +! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) ! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f -! dxdx dydy dzdz dx dy dz +! dxdx dydy dzdz dx dy dz ! ! with Dirichlet boundary conditions -! u = g +! u = g ! ! on the unit cube 0<=x,y,z<=1. ! @@ -60,37 +60,37 @@ ! module psb_s_pde3d_mod - + use psb_base_mod, only : psb_spk_, psb_ipk_, psb_lpk_, psb_desc_type,& & psb_sspmat_type, psb_s_vect_type, szero,& & psb_s_base_sparse_mat, psb_s_base_vect_type, & & psb_i_base_vect_type, psb_l_base_vect_type - interface + interface function s_func_3d(x,y,z) result(val) import :: psb_spk_ real(psb_spk_), intent(in) :: x,y,z real(psb_spk_) :: val end function s_func_3d - end interface + end interface interface psb_gen_pde3d module procedure psb_s_gen_pde3d end interface psb_gen_pde3d - + contains function s_null_func_3d(x,y,z) result(val) real(psb_spk_), intent(in) :: x,y,z real(psb_spk_) :: val - + val = szero end function s_null_func_3d ! - ! functions parametrizing the differential equation - ! + ! functions parametrizing the differential equation + ! ! ! Note: b1, b2 and b3 are the coefficients of the first @@ -103,70 +103,70 @@ contains ! function b1(x,y,z) use psb_base_mod, only : psb_spk_, sone, szero - implicit none + implicit none real(psb_spk_) :: b1 real(psb_spk_), intent(in) :: x,y,z b1=szero end function b1 function b2(x,y,z) use psb_base_mod, only : psb_spk_, sone, szero - implicit none + implicit none real(psb_spk_) :: b2 real(psb_spk_), intent(in) :: x,y,z b2=szero end function b2 function b3(x,y,z) use psb_base_mod, only : psb_spk_, sone, szero - implicit none + implicit none real(psb_spk_) :: b3 - real(psb_spk_), intent(in) :: x,y,z + real(psb_spk_), intent(in) :: x,y,z b3=szero end function b3 function c(x,y,z) use psb_base_mod, only : psb_spk_, sone, szero - implicit none + implicit none real(psb_spk_) :: c - real(psb_spk_), intent(in) :: x,y,z + real(psb_spk_), intent(in) :: x,y,z c=szero end function c function a1(x,y,z) use psb_base_mod, only : psb_spk_, sone, szero - implicit none - real(psb_spk_) :: a1 + implicit none + real(psb_spk_) :: a1 real(psb_spk_), intent(in) :: x,y,z a1=sone/80 end function a1 function a2(x,y,z) use psb_base_mod, only : psb_spk_, sone, szero - implicit none + implicit none real(psb_spk_) :: a2 real(psb_spk_), intent(in) :: x,y,z a2=sone/80 end function a2 function a3(x,y,z) use psb_base_mod, only : psb_spk_, sone, szero - implicit none + implicit none real(psb_spk_) :: a3 real(psb_spk_), intent(in) :: x,y,z a3=sone/80 end function a3 function g(x,y,z) use psb_base_mod, only : psb_spk_, sone, szero - implicit none + implicit none real(psb_spk_) :: g real(psb_spk_), intent(in) :: x,y,z g = szero if (x == sone) then g = sone - else if (x == szero) then + else if (x == szero) then g = exp(y**2-z**2) end if end function g - + ! ! subroutine to allocate and fill in the coefficient matrix and - ! the rhs. + ! the rhs. ! subroutine psb_s_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& & f,amold,vmold,imold,partition,nrl,iv) @@ -174,13 +174,13 @@ contains use psb_util_mod ! ! Discretizes the partial differential equation - ! - ! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) + ! + ! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) ! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f - ! dxdx dydy dzdz dx dy dz + ! dxdx dydy dzdz dx dy dz ! ! with Dirichlet boundary conditions - ! u = g + ! u = g ! ! on the unit cube 0<=x,y,z<=1. ! @@ -197,7 +197,7 @@ contains character(len=*) :: afmt procedure(s_func_3d), optional :: f class(psb_s_base_sparse_mat), optional :: amold - class(psb_s_base_vect_type), optional :: vmold + class(psb_s_base_vect_type), optional :: vmold class(psb_i_base_vect_type), optional :: imold integer(psb_ipk_), optional :: partition, nrl,iv(:) @@ -238,7 +238,7 @@ contains call psb_info(ctxt, iam, np) - if (present(f)) then + if (present(f)) then f_ => f else f_ => s_null_func_3d @@ -258,10 +258,10 @@ contains else partition_ = 3 end if - + ! initialize array descriptor and sparse matrix storage. provide an - ! estimate of the number of non zeroes - + ! estimate of the number of non zeroes + m = (1_psb_lpk_*idim)*idim*idim n = m nnz = ((n*7)/(np)) @@ -269,8 +269,8 @@ contains t0 = psb_wtime() select case(partition_) case(1) - ! A BLOCK partition - if (present(nrl)) then + ! A BLOCK partition + if (present(nrl)) then nr = nrl else ! @@ -281,46 +281,46 @@ contains end if nt = nr - call psb_sum(ctxt,nt) - if (nt /= m) then + call psb_sum(ctxt,nt) + if (nt /= m) then write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m info = -1 call psb_barrier(ctxt) call psb_abort(ctxt) - return + return end if ! ! First example of use of CDALL: specify for each process a number of ! contiguous rows - ! + ! call psb_cdall(ctxt,desc_a,info,nl=nr) myidx = desc_a%get_global_indices() nlr = size(myidx) case(2) ! A partition defined by the user through IV - - if (present(iv)) then + + if (present(iv)) then if (size(iv) /= m) then write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m info = -1 call psb_barrier(ctxt) call psb_abort(ctxt) - return + return end if else write(psb_err_unit,*) iam, 'Initialization error: IV not present' info = -1 call psb_barrier(ctxt) call psb_abort(ctxt) - return + return end if ! ! Second example of use of CDALL: specify for each row the - ! process that owns it - ! + ! process that owns it + ! call psb_cdall(ctxt,desc_a,info,vg=iv) myidx = desc_a%get_global_indices() nlr = size(myidx) @@ -336,7 +336,7 @@ contains npz = npdims(3) allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz)) - ! We can reuse idx2ijk for process indices as well. + ! We can reuse idx2ijk for process indices as well. call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0) ! Now let's split the 3D cube in hexahedra call dist1Didx(bndx,idim,npx) @@ -346,7 +346,7 @@ contains call dist1Didx(bndz,idim,npz) mynz = bndz(iamz+1)-bndz(iamz) - ! How many indices do I own? + ! How many indices do I own? nlr = mynx*myny*mynz allocate(myidx(nlr)) ! Now, let's generate the list of indices I own @@ -370,9 +370,8 @@ contains ! ! Third example of use of CDALL: specify for each process ! the set of global indices it owns. - ! + ! call psb_cdall(ctxt,desc_a,info,vl=myidx) - case default write(psb_err_unit,*) iam, 'Initialization error: should not get here' info = -1 @@ -381,9 +380,9 @@ contains return end select - + if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz) - ! define rhs from boundary conditions; also build initial guess + ! define rhs from boundary conditions; also build initial guess if (info == psb_success_) call psb_geall(xv,desc_a,info) if (info == psb_success_) call psb_geall(bv,desc_a,info) @@ -398,12 +397,12 @@ contains end if ! we build an auxiliary matrix consisting of one row at a - ! time; just a small matrix. might be extended to generate - ! a bunch of rows per call. - ! + ! time; just a small matrix. might be extended to generate + ! a bunch of rows per call. + ! allocate(val(20*nb),irow(20*nb),& &icol(20*nb),stat=info) - if (info /= psb_success_ ) then + if (info /= psb_success_ ) then info=psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 @@ -416,11 +415,11 @@ contains call psb_barrier(ctxt) t1 = psb_wtime() do ii=1, nlr,nb - ib = min(nb,nlr-ii+1) + ib = min(nb,nlr-ii+1) icoeff = 1 do k=1,ib i=ii+k-1 - ! local matrix pointer + ! local matrix pointer glob_row=myidx(i) ! compute gridpoint coordinates call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) @@ -430,11 +429,11 @@ contains z = (iz-1)*deltah zt(k) = f_(x,y,z) ! internal point: build discretization - ! + ! ! term depending on (x-1,y,z) ! val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2 - if (ix == 1) then + if (ix == 1) then zt(k) = g(szero,y,z)*(-val(icoeff)) + zt(k) else call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim) @@ -443,19 +442,19 @@ contains endif ! term depending on (x,y-1,z) val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2 - if (iy == 1) then + if (iy == 1) then zt(k) = g(x,szero,z)*(-val(icoeff)) + zt(k) else - call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) + call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) irow(icoeff) = glob_row icoeff = icoeff+1 endif ! term depending on (x,y,z-1) val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2 - if (iz == 1) then + if (iz == 1) then zt(k) = g(x,y,szero)*(-val(icoeff)) + zt(k) else - call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) + call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) irow(icoeff) = glob_row icoeff = icoeff+1 endif @@ -463,33 +462,33 @@ contains ! term depending on (x,y,z) val(icoeff)=(2*sone)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah & & + c(x,y,z) - call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim) + call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim) irow(icoeff) = glob_row - icoeff = icoeff+1 + icoeff = icoeff+1 ! term depending on (x,y,z+1) val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2 - if (iz == idim) then + if (iz == idim) then zt(k) = g(x,y,sone)*(-val(icoeff)) + zt(k) else - call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim) + call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim) irow(icoeff) = glob_row icoeff = icoeff+1 endif ! term depending on (x,y+1,z) val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2 - if (iy == idim) then + if (iy == idim) then zt(k) = g(x,sone,z)*(-val(icoeff)) + zt(k) else - call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim) + call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim) irow(icoeff) = glob_row icoeff = icoeff+1 endif ! term depending on (x+1,y,z) val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2 - if (ix==idim) then + if (ix==idim) then zt(k) = g(sone,y,z)*(-val(icoeff)) + zt(k) else - call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim) + call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim) irow(icoeff) = glob_row icoeff = icoeff+1 endif @@ -520,8 +519,8 @@ contains tcdasb = psb_wtime()-t1 call psb_barrier(ctxt) t1 = psb_wtime() - if (info == psb_success_) then - if (present(amold)) then + if (info == psb_success_) then + if (present(amold)) then call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,mold=amold) else call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,afmt=afmt) @@ -544,7 +543,7 @@ contains end if tasb = psb_wtime()-t1 call psb_barrier(ctxt) - ttot = psb_wtime() - t0 + ttot = psb_wtime() - t0 call psb_amx(ctxt,talc) call psb_amx(ctxt,tgen) @@ -586,9 +585,9 @@ program psb_s_pde3d integer(psb_ipk_) :: idim integer(psb_epk_) :: system_size - ! miscellaneous + ! miscellaneous real(psb_spk_), parameter :: one = sone - real(psb_dpk_) :: t1, t2, tprec + real(psb_dpk_) :: t1, t2, tprec ! sparse matrix and preconditioner type(psb_sspmat_type) :: a @@ -606,6 +605,14 @@ program psb_s_pde3d integer(psb_epk_) :: amatsize, precsize, descsize, d2size real(psb_spk_) :: err, eps + ! Parameters for solvers in Block-Jacobi preconditioner + type ainvparms + character(len=12) :: alg, orth_alg, ilu_alg, ilut_scale + integer(psb_ipk_) :: fill, inv_fill + real(psb_spk_) :: thresh, inv_thresh + end type ainvparms + type(ainvparms) :: parms + ! other variables integer(psb_ipk_) :: info, i character(len=20) :: name,ch_err @@ -617,7 +624,7 @@ program psb_s_pde3d call psb_init(ctxt) call psb_info(ctxt,iam,np) - if (iam < 0) then + if (iam < 0) then ! This should not happen, but just in case call psb_exit(ctxt) stop @@ -628,21 +635,20 @@ program psb_s_pde3d ! ! Hello world ! - if (iam == psb_root_) then + if (iam == psb_root_) then write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ write(*,*) 'This is the ',trim(name),' sample program' end if ! ! get parameters ! - call get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart) - + call get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart,parms) ! - ! allocate and fill in the coefficient matrix, rhs and initial guess + ! allocate and fill in the coefficient matrix, rhs and initial guess ! call psb_barrier(ctxt) t1 = psb_wtime() - call psb_gen_pde3d(ctxt,idim,a,bv,xxv,desc_a,afmt,info,partition=ipart) + call psb_gen_pde3d(ctxt,idim,a,bv,xxv,desc_a,afmt,info,partition=ipart) call psb_barrier(ctxt) t2 = psb_wtime() - t1 if(info /= psb_success_) then @@ -655,9 +661,43 @@ program psb_s_pde3d if (iam == psb_root_) write(psb_out_unit,'(" ")') ! ! prepare the preconditioner. - ! + ! if(iam == psb_root_) write(psb_out_unit,'("Setting preconditioner to : ",a)')ptype call prec%init(ctxt,ptype,info) + ! + ! Set the options for the BJAC preconditioner + ! + if (psb_toupper(ptype) == "BJAC") then + call prec%set('sub_solve', parms%alg, info) + select case (psb_toupper(parms%alg)) + case ("ILU") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('ilu_alg', parms%ilu_alg, info) + case ("ILUT") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('sub_iluthrs', parms%thresh, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + case ("AINV") + call prec%set('inv_thresh', parms%inv_thresh, info) + call prec%set('inv_fillin', parms%inv_fill, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + call prec%set('ainv_alg', parms%orth_alg, info) + case ("INVK") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('inv_fillin', parms%inv_fill, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + case ("INVT") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('inv_fillin', parms%inv_fill, info) + call prec%set('sub_iluthrs', parms%thresh, info) + call prec%set('inv_thresh', parms%inv_thresh, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + case default + ! Do nothing, use default setting in the init routine + end select + else + ! nothing to set for NONE or DIAG preconditioner + end if call psb_barrier(ctxt) t1 = psb_wtime() @@ -677,14 +717,14 @@ program psb_s_pde3d if (iam == psb_root_) write(psb_out_unit,'(" ")') call prec%descr() ! - ! iterative method parameters + ! iterative method parameters ! if(iam == psb_root_) write(psb_out_unit,'("Calling iterative method ",a)')kmethd call psb_barrier(ctxt) - t1 = psb_wtime() + t1 = psb_wtime() eps = 1.d-6 - call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,& - & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst) + call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,& + & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst) if(info /= psb_success_) then info=psb_err_from_subroutine_ @@ -714,14 +754,14 @@ program psb_s_pde3d write(psb_out_unit,'("Convergence indicator on exit : ",es12.5)')err write(psb_out_unit,'("Info on exit : ",i12)')info write(psb_out_unit,'("Total memory occupation for A: ",i12)')amatsize - write(psb_out_unit,'("Total memory occupation for PREC: ",i12)')precsize + write(psb_out_unit,'("Total memory occupation for PREC: ",i12)')precsize write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize write(psb_out_unit,'("Storage format for A: ",a)') a%get_fmt() write(psb_out_unit,'("Storage format for DESC_A: ",a)') desc_a%get_fmt() end if - ! + ! ! cleanup storage and exit ! call psb_gefree(bv,desc_a,info) @@ -747,13 +787,15 @@ contains ! ! get iteration parameters from standard input ! - subroutine get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart) + subroutine get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,& + & itmax,itrace,irst,ipart,parms) type(psb_ctxt_type) :: ctxt character(len=*) :: kmethd, ptype, afmt integer(psb_ipk_) :: idim, istopc,itmax,itrace,irst,ipart integer(psb_ipk_) :: np, iam integer(psb_ipk_) :: ip, inp_unit character(len=1024) :: filename + type(ainvparms) :: parms call psb_info(ctxt, iam, np) @@ -782,12 +824,12 @@ contains if (ip >= 4) then read(inp_unit,*) ipart else - ipart = 3 + ipart = 3 endif if (ip >= 5) then read(inp_unit,*) istopc else - istopc=1 + istopc=1 endif if (ip >= 6) then read(inp_unit,*) itmax @@ -804,8 +846,27 @@ contains else irst=1 endif + if (ip >= 9) then + read(inp_unit,*) parms%alg + read(inp_unit,*) parms%ilu_alg + read(inp_unit,*) parms%ilut_scale + read(inp_unit,*) parms%fill + read(inp_unit,*) parms%inv_fill + read(inp_unit,*) parms%thresh + read(inp_unit,*) parms%inv_thresh + read(inp_unit,*) parms%orth_alg + else + parms%alg = 'ILU' ! Block Solver ILU,ILUT,INVK,AINVT,AORTH + parms%ilu_alg = 'NONE' ! If ILU : MILU or NONE othewise ignored + parms%ilut_scale = 'NONE' ! If ILUT: NONE, MAXVAL, DIAG, ARWSUM, ACLSUM, ARCSUM + parms%fill = 0 ! Level of fill for forward factorization + parms%inv_fill = 1 ! Level of fill for inverse factorization (only INVK) + parms%thresh = 1E-1_psb_spk_ ! Threshold for forward factorization + parms%inv_thresh = 1E-1_psb_spk_ ! Threshold for inverse factorization + parms%orth_alg = 'LLK' ! What orthogonalization algorithm? + endif - write(psb_out_unit,'("Solving matrix : ell1")') + write(psb_out_unit,'("Solving matrix : ell1")') write(psb_out_unit,& & '("Grid dimensions : ",i4," x ",i4," x ",i4)') & & idim,idim,idim @@ -820,11 +881,40 @@ contains write(psb_out_unit,'("Unknown data distrbution, defaulting to 3D")') end select write(psb_out_unit,'("Preconditioner : ",a)') ptype + if( psb_toupper(ptype) == "BJAC" ) then + write(psb_out_unit,'("Block subsolver : ",a)') parms%alg + select case (psb_toupper(parms%alg)) + case ('ILU') + write(psb_out_unit,'("Fill in : ",i0)') parms%fill + write(psb_out_unit,'("MILU : ",a)') parms%ilu_alg + case ('ILUT') + write(psb_out_unit,'("Fill in : ",i0)') parms%fill + write(psb_out_unit,'("Threshold : ",es12.5)') parms%thresh + write(psb_out_unit,'("Scaling : ",a)') parms%ilut_scale + case ('INVK') + write(psb_out_unit,'("Fill in : ",i0)') parms%fill + write(psb_out_unit,'("Invese Fill in : ",i0)') parms%inv_fill + write(psb_out_unit,'("Scaling : ",a)') parms%ilut_scale + case ('INVT') + write(psb_out_unit,'("Fill in : ",i0)') parms%fill + write(psb_out_unit,'("Threshold : ",es12.5)') parms%thresh + write(psb_out_unit,'("Invese Fill in : ",i0)') parms%inv_fill + write(psb_out_unit,'("Inverse Threshold : ",es12.5)') parms%inv_thresh + write(psb_out_unit,'("Scaling : ",a)') parms%ilut_scale + case ('AINV','AORTH') + write(psb_out_unit,'("Inverse Threshold : ",es12.5)') parms%inv_thresh + write(psb_out_unit,'("Invese Fill in : ",i0)') parms%inv_fill + write(psb_out_unit,'("Orthogonalization : ",a)') parms%orth_alg + write(psb_out_unit,'("Scaling : ",a)') parms%ilut_scale + case default + write(psb_out_unit,'("Unknown diagonal solver")') + end select + end if write(psb_out_unit,'("Iterative method : ",a)') kmethd write(psb_out_unit,'(" ")') else ! wrong number of parameter, print an error message and exit - call pr_usage(izero) + call pr_usage(izero) call psb_abort(ctxt) stop 1 endif @@ -843,20 +933,27 @@ contains call psb_bcast(ctxt,itmax) call psb_bcast(ctxt,itrace) call psb_bcast(ctxt,irst) + call psb_bcast(ctxt,parms%alg) + call psb_bcast(ctxt,parms%fill) + call psb_bcast(ctxt,parms%inv_fill) + call psb_bcast(ctxt,parms%thresh) + call psb_bcast(ctxt,parms%inv_thresh) + call psb_bcast(ctxt,parms%orth_alg) + call psb_bcast(ctxt,parms%ilut_scale) return end subroutine get_parms ! - ! print an error message - ! + ! print an error message + ! subroutine pr_usage(iout) integer(psb_ipk_) :: iout write(iout,*)'incorrect parameter(s) found' write(iout,*)' usage: pde3d90 methd prec dim & - &[istop itmax itrace]' + &[istop itmax itrace]' write(iout,*)' where:' - write(iout,*)' methd: cgstab cgs rgmres bicgstabl' + write(iout,*)' methd: cgstab cgs rgmres bicgstabl' write(iout,*)' prec : bjac diag none' write(iout,*)' dim number of points along each axis' write(iout,*)' the size of the resulting linear ' @@ -864,11 +961,9 @@ contains write(iout,*)' ipart data partition 1 3 ' write(iout,*)' istop stopping criterion 1, 2 ' write(iout,*)' itmax maximum number of iterations [500] ' - write(iout,*)' itrace <=0 (no tracing, default) or ' + write(iout,*)' itrace <=0 (no tracing, default) or ' write(iout,*)' >= 1 do tracing every itrace' - write(iout,*)' iterations ' + write(iout,*)' iterations ' end subroutine pr_usage end program psb_s_pde3d - - diff --git a/test/pargen/runs/ppde.inp b/test/pargen/runs/ppde.inp index f6fe33eb..f4b45430 100644 --- a/test/pargen/runs/ppde.inp +++ b/test/pargen/runs/ppde.inp @@ -1,4 +1,4 @@ -8 Number of entries below this +17 Number of entries below this BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES FCG CGR BJAC Preconditioner NONE DIAG BJAC CSR Storage format for matrix A: CSR COO @@ -8,5 +8,11 @@ CSR Storage format for matrix A: CSR COO 0100 MAXIT 05 ITRACE 002 IRST restart for RGMRES and BiCGSTABL - - +ILU Block Solver ILU,ILUT,INVK,AINVT,AORTH +NONE If ILU : MILU or NONE othewise ignored +NONE Scaling if ILUT: NONE, MAXVAL otherwise ignored +0 Level of fill for forward factorization +1 Level of fill for inverse factorization (only INVK) +1E-1 Threshold for forward factorization +1E-1 Threshold for inverse factorization (Only INVK, AINVT) +LLK What orthogonalization algorithm? (Only AINVT) diff --git a/util/Makefile b/util/Makefile index 9809a34f..4f4a134a 100644 --- a/util/Makefile +++ b/util/Makefile @@ -10,7 +10,8 @@ HERE=. BASEOBJS= psb_blockpart_mod.o psb_metispart_mod.o psb_partidx_mod.o \ psb_hbio_mod.o psb_mmio_mod.o psb_mat_dist_mod.o \ psb_s_mat_dist_mod.o psb_d_mat_dist_mod.o psb_c_mat_dist_mod.o psb_z_mat_dist_mod.o \ - psb_renum_mod.o psb_gps_mod.o + psb_renum_mod.o psb_gps_mod.o \ + psb_s_renum_mod.o psb_d_renum_mod.o psb_c_renum_mod.o psb_z_renum_mod.o IMPLOBJS= psb_s_hbio_impl.o psb_d_hbio_impl.o \ psb_c_hbio_impl.o psb_z_hbio_impl.o \ psb_s_mmio_impl.o psb_d_mmio_impl.o \ @@ -40,6 +41,7 @@ $(OBJS): $(MODDIR)/$(BASEMODNAME)$(.mod) psb_util_mod.o: $(BASEOBJS) psb_metispart_mod.o: psb_metis_int.o psb_mat_dist_mod.o: psb_s_mat_dist_mod.o psb_d_mat_dist_mod.o psb_c_mat_dist_mod.o psb_z_mat_dist_mod.o +psb_renum_mod.o: psb_s_renum_mod.o psb_d_renum_mod.o psb_c_renum_mod.o psb_z_renum_mod.o $(IMPLOBJS): $(BASEOBJS) diff --git a/util/psb_c_renum_impl.F90 b/util/psb_c_renum_impl.F90 index 4a1cf220..f57003a1 100644 --- a/util/psb_c_renum_impl.F90 +++ b/util/psb_c_renum_impl.F90 @@ -29,37 +29,61 @@ ! POSSIBILITY OF SUCH DAMAGE. ! ! -subroutine psb_c_mat_renums(alg,mat,info,perm) +subroutine psb_c_mat_renum(alg,mat,info,perm) use psb_base_mod - use psb_renum_mod, psb_protect_name => psb_c_mat_renums + use psb_renum_mod, psb_protect_name => psb_c_mat_renum implicit none character(len=*), intent(in) :: alg type(psb_cspmat_type), intent(inout) :: mat integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:) - integer(psb_ipk_) :: err_act, nr, nc, ialg + integer(psb_ipk_) :: err_act, nr, nc, i, ierr(5) character(len=20) :: name info = psb_success_ name = 'mat_renum' call psb_erractionsave(err_act) + nr = mat%get_nrows() + nc = mat%get_ncols() + if (nr /= nc) then + info = psb_err_rectangular_mat_unsupported_ + ierr(1) = nr; ierr(2) = nc; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + info = psb_success_ select case (psb_toupper(alg)) case ('GPS') - ialg = psb_mat_renum_gps_ - case ('AMD') - ialg = psb_mat_renum_amd_ + + call psb_mat_renum_gps(mat,info,perm) + + case('AMD') + + call psb_mat_renum_amd(mat,info,perm) + case ('NONE', 'ID') - ialg = psb_mat_renum_identity_ + nr = mat%get_nrows() + allocate(perm(nr),stat=info) + if (info == 0) then + do i=1,nr + perm(i) = i + end do + else + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + endif case default write(0,*) 'Unknown algorithm "',psb_toupper(alg),'"' - ialg = -1 + info = psb_err_input_value_invalid_i_ + ierr(1) = 1; + call psb_errpush(info,name,i_err=ierr) + goto 9999 end select - - call psb_mat_renum(ialg,mat,info,perm) - + if (info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) @@ -71,26 +95,219 @@ subroutine psb_c_mat_renums(alg,mat,info,perm) 9999 call psb_error_handler(err_act) return -end subroutine psb_c_mat_renums - -subroutine psb_c_mat_renum(alg,mat,info,perm) + +contains + + subroutine psb_mat_renum_gps(a,info,operm) + use psb_base_mod + use psb_gps_mod + implicit none + type(psb_cspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:) + + ! + class(psb_c_base_sparse_mat), allocatable :: aa + type(psb_c_csr_sparse_mat) :: acsr + type(psb_c_coo_sparse_mat) :: acoo + + integer(psb_ipk_) :: err_act + character(len=20) :: name + integer(psb_ipk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) + integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth + + info = psb_success_ + name = 'mat_renum_gps' + call psb_erractionsave(err_act) + + info = psb_success_ + + call a%mold(aa) + call a%mv_to(aa) + call aa%mv_to_fmt(acsr,info) + ! Insert call to gps_reduce + nr = acsr%get_nrows() + ideg = 0 + do i=1, nr + ideg = max(ideg,acsr%irp(i+1)-acsr%irp(i)) + end do + allocate(ndstk(nr,ideg), iold(nr), perm(nr+1), ndeg(nr),stat=info) + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + do i=1, nr + iold(i) = i + ndstk(i,:) = 0 + k = 0 + do j=acsr%irp(i),acsr%irp(i+1)-1 + k = k + 1 + ndstk(i,k) = acsr%ja(j) + end do + end do + perm = 0 + + call psb_gps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth) + + if (.not.psb_isaperm(nr,perm)) then + write(0,*) 'Something wrong: bad perm from gps_reduce' + info = psb_err_from_subroutine_ + call psb_errpush(info,name) + goto 9999 + end if + ! Move to coordinate to apply renumbering + call acsr%mv_to_coo(acoo,info) + do i=1, acoo%get_nzeros() + acoo%ia(i) = perm(acoo%ia(i)) + acoo%ja(i) = perm(acoo%ja(i)) + end do + call acoo%fix(info) + + ! Get back to where we started from + call aa%mv_from_coo(acoo,info) + call a%mv_from(aa) + if (present(operm)) then + call psb_realloc(nr,operm,info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + operm(1:nr) = perm(1:nr) + end if + + deallocate(aa) + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_mat_renum_gps + + + subroutine psb_mat_renum_amd(a,info,operm) +#if defined(HAVE_AMD) + use iso_c_binding +#endif + use psb_base_mod + implicit none + type(psb_cspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:) + + ! +#if defined(HAVE_AMD) + interface + function psb_amd_order(n,ap,ai,p)& + & result(res) bind(c,name='psb_amd_order') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + integer(c_int) :: ap(*), ai(*), p(*) + end function psb_amd_order + end interface +#endif + + type(psb_c_csc_sparse_mat) :: acsc + class(psb_c_base_sparse_mat), allocatable :: aa + type(psb_c_coo_sparse_mat) :: acoo + + integer(psb_ipk_), allocatable :: perm(:) + integer(psb_ipk_) :: err_act + character(len=20) :: name + integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz + + info = psb_success_ + name = 'mat_renum_amd' + call psb_erractionsave(err_act) + +#if defined(HAVE_AMD) && defined(IPK4) + + info = psb_success_ + nr = a%get_nrows() + nz = a%get_nzeros() + allocate(perm(nr),stat=info) + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + allocate(aa, mold=a%a) + call a%mv_to(acsc) + + acsc%ia(:) = acsc%ia(:) - 1 + acsc%icp(:) = acsc%icp(:) - 1 + + info = psb_amd_order(nr,acsc%icp,acsc%ia,perm) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_amd_order') + goto 9999 + end if + + perm(:) = perm(:) + 1 + acsc%ia(:) = acsc%ia(:) + 1 + acsc%icp(:) = acsc%icp(:) + 1 + + call acsc%mv_to_coo(acoo,info) + do i=1, acoo%get_nzeros() + acoo%ia(i) = perm(acoo%ia(i)) + acoo%ja(i) = perm(acoo%ja(i)) + end do + call acoo%fix(info) + + ! Get back to where we started from + call aa%mv_from_coo(acoo,info) + call a%mv_from(aa) + if (present(operm)) then + call psb_realloc(nr,operm,info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + operm(1:nr) = perm(1:nr) + end if + + deallocate(aa,perm) + +#else + + info = psb_err_missing_aux_lib_ + call psb_errpush(info, name) + goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_mat_renum_amd + +end subroutine psb_c_mat_renum + +subroutine psb_lc_mat_renum(alg,mat,info,perm) use psb_base_mod - use psb_renum_mod, psb_protect_name => psb_c_mat_renum + use psb_renum_mod, psb_protect_name => psb_lc_mat_renum implicit none - integer(psb_ipk_), intent(in) :: alg - type(psb_cspmat_type), intent(inout) :: mat + character(len=*), intent(in) :: alg + type(psb_lcspmat_type), intent(inout) :: mat integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:) + integer(psb_lpk_), allocatable, optional, intent(out) :: perm(:) - integer(psb_ipk_) :: err_act, nr, nc, i, ierr(5) + integer(psb_lpk_) :: nr, nc, i + integer(psb_ipk_) :: err_act, ierr(5) character(len=20) :: name info = psb_success_ name = 'mat_renum' call psb_erractionsave(err_act) - info = psb_success_ - nr = mat%get_nrows() nc = mat%get_ncols() if (nr /= nc) then @@ -100,16 +317,17 @@ subroutine psb_c_mat_renum(alg,mat,info,perm) goto 9999 end if - select case (alg) - case(psb_mat_renum_gps_) + info = psb_success_ + select case (psb_toupper(alg)) + case ('GPS') - call psb_mat_renum_gps(mat,info,perm) + call psb_lmat_renum_gps(mat,info,perm) - case(psb_mat_renum_amd_) + case('AMD') - call psb_mat_renum_amd(mat,info,perm) + call psb_lmat_renum_amd(mat,info,perm) - case(psb_mat_renum_identity_) + case ('NONE', 'ID') nr = mat%get_nrows() allocate(perm(nr),stat=info) if (info == 0) then @@ -122,8 +340,9 @@ subroutine psb_c_mat_renum(alg,mat,info,perm) goto 9999 endif case default + write(0,*) 'Unknown algorithm "',psb_toupper(alg),'"' info = psb_err_input_value_invalid_i_ - ierr(1) = 1; ierr(2) = alg; + ierr(1) = 1; call psb_errpush(info,name,i_err=ierr) goto 9999 end select @@ -142,26 +361,26 @@ subroutine psb_c_mat_renum(alg,mat,info,perm) contains - subroutine psb_mat_renum_gps(a,info,operm) + subroutine psb_lmat_renum_gps(a,info,operm) use psb_base_mod - use psb_gps_mod + use psb_lgps_mod implicit none - type(psb_cspmat_type), intent(inout) :: a + type(psb_lcspmat_type), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:) + integer(psb_lpk_), allocatable, optional, intent(out) :: operm(:) ! - class(psb_c_base_sparse_mat), allocatable :: aa - type(psb_c_csr_sparse_mat) :: acsr - type(psb_c_coo_sparse_mat) :: acoo + class(psb_lc_base_sparse_mat), allocatable :: aa + type(psb_lc_csr_sparse_mat) :: acsr + type(psb_lc_coo_sparse_mat) :: acoo integer(psb_ipk_) :: err_act character(len=20) :: name - integer(psb_ipk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) - integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth + integer(psb_lpk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) + integer(psb_lpk_) :: i, j, k, ideg, nr, ibw, ipf, idpth info = psb_success_ - name = 'mat_renum' + name = 'mat_renum_gps' call psb_erractionsave(err_act) info = psb_success_ @@ -192,7 +411,7 @@ contains end do perm = 0 - call psb_gps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth) + call psb_lgps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth) if (.not.psb_isaperm(nr,perm)) then write(0,*) 'Something wrong: bad perm from gps_reduce' @@ -228,18 +447,18 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine psb_mat_renum_gps + end subroutine psb_lmat_renum_gps - subroutine psb_mat_renum_amd(a,info,operm) + subroutine psb_lmat_renum_amd(a,info,operm) #if defined(HAVE_AMD) use iso_c_binding #endif use psb_base_mod implicit none - type(psb_cspmat_type), intent(inout) :: a + type(psb_lcspmat_type), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:) + integer(psb_lpk_), allocatable, optional, intent(out) :: operm(:) ! #if defined(HAVE_AMD) @@ -254,20 +473,20 @@ contains end interface #endif - type(psb_c_csc_sparse_mat) :: acsc - class(psb_c_base_sparse_mat), allocatable :: aa - type(psb_c_coo_sparse_mat) :: acoo + type(psb_lc_csc_sparse_mat) :: acsc + class(psb_lc_base_sparse_mat), allocatable :: aa + type(psb_lc_coo_sparse_mat) :: acoo integer(psb_ipk_), allocatable :: perm(:) integer(psb_ipk_) :: err_act character(len=20) :: name - integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz + integer(psb_lpk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz info = psb_success_ name = 'mat_renum_amd' call psb_erractionsave(err_act) -#if defined(HAVE_AMD) && defined(IPK4) +#if defined(HAVE_AMD) && defined(LPK4) info = psb_success_ nr = a%get_nrows() @@ -331,11 +550,9 @@ contains 9999 call psb_error_handler(err_act) return + end subroutine psb_lmat_renum_amd - end subroutine psb_mat_renum_amd - -end subroutine psb_c_mat_renum - +end subroutine psb_lc_mat_renum subroutine psb_c_cmp_bwpf(mat,bwl,bwu,prf,info) use psb_base_mod @@ -386,3 +603,52 @@ subroutine psb_c_cmp_bwpf(mat,bwl,bwu,prf,info) end select end subroutine psb_c_cmp_bwpf + +subroutine psb_lc_cmp_bwpf(mat,bwl,bwu,prf,info) + use psb_base_mod + use psb_renum_mod, psb_protect_name => psb_lc_cmp_bwpf + implicit none + type(psb_lcspmat_type), intent(in) :: mat + integer(psb_lpk_), intent(out) :: bwl, bwu + integer(psb_lpk_), intent(out) :: prf + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_lpk_), allocatable :: irow(:), icol(:) + complex(psb_spk_), allocatable :: val(:) + integer(psb_lpk_) :: nz, i, j, lrbu, lrbl + + info = psb_success_ + bwl = 0 + bwu = 0 + prf = 0 + select type (aa=>mat%a) + class is (psb_lc_csr_sparse_mat) + do i=1, aa%get_nrows() + lrbl = 0 + lrbu = 0 + do j = aa%irp(i), aa%irp(i+1) - 1 + lrbl = max(lrbl,i-aa%ja(j)) + lrbu = max(lrbu,aa%ja(j)-i) + end do + prf = prf + lrbl+lrbu + bwu = max(bwu,lrbu) + bwl = max(bwl,lrbu) + end do + + class default + do i=1, aa%get_nrows() + lrbl = 0 + lrbu = 0 + call aa%csget(i,i,nz,irow,icol,val,info) + if (info /= psb_success_) return + do j=1, nz + lrbl = max(lrbl,i-icol(j)) + lrbu = max(lrbu,icol(j)-i) + end do + prf = prf + lrbl+lrbu + bwu = max(bwu,lrbu) + bwl = max(bwl,lrbu) + end do + end select + +end subroutine psb_lc_cmp_bwpf diff --git a/util/psb_c_renum_mod.f90 b/util/psb_c_renum_mod.f90 new file mode 100644 index 00000000..11335484 --- /dev/null +++ b/util/psb_c_renum_mod.f90 @@ -0,0 +1,69 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +module psb_c_renum_mod + use psb_base_mod + + interface psb_mat_renum + subroutine psb_c_mat_renum(alg,mat,info,perm) + import :: psb_ipk_, psb_cspmat_type + character(len=*), intent(in) :: alg + type(psb_cspmat_type), intent(inout) :: mat + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:) + end subroutine psb_c_mat_renum + subroutine psb_lc_mat_renum(alg,mat,info,perm) + import :: psb_ipk_, psb_lpk_, psb_lcspmat_type + character(len=*), intent(in) :: alg + type(psb_lcspmat_type), intent(inout) :: mat + integer(psb_ipk_), intent(out) :: info + integer(psb_lpk_), allocatable, optional, intent(out) :: perm(:) + end subroutine psb_lc_mat_renum + end interface psb_mat_renum + + interface psb_cmp_bwpf + subroutine psb_c_cmp_bwpf(mat,bwl,bwu,prf,info) + import :: psb_ipk_, psb_cspmat_type + type(psb_cspmat_type), intent(in) :: mat + integer(psb_ipk_), intent(out) :: bwl, bwu + integer(psb_ipk_), intent(out) :: prf + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cmp_bwpf + subroutine psb_lc_cmp_bwpf(mat,bwl,bwu,prf,info) + import :: psb_ipk_, psb_lpk_, psb_lcspmat_type + type(psb_lcspmat_type), intent(in) :: mat + integer(psb_lpk_), intent(out) :: bwl, bwu + integer(psb_lpk_), intent(out) :: prf + integer(psb_ipk_), intent(out) :: info + end subroutine psb_lc_cmp_bwpf + end interface psb_cmp_bwpf + +end module psb_c_renum_mod diff --git a/util/psb_d_renum_impl.F90 b/util/psb_d_renum_impl.F90 index bd4664d8..1c7928f5 100644 --- a/util/psb_d_renum_impl.F90 +++ b/util/psb_d_renum_impl.F90 @@ -29,37 +29,61 @@ ! POSSIBILITY OF SUCH DAMAGE. ! ! -subroutine psb_d_mat_renums(alg,mat,info,perm) +subroutine psb_d_mat_renum(alg,mat,info,perm) use psb_base_mod - use psb_renum_mod, psb_protect_name => psb_d_mat_renums + use psb_renum_mod, psb_protect_name => psb_d_mat_renum implicit none character(len=*), intent(in) :: alg type(psb_dspmat_type), intent(inout) :: mat integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:) - integer(psb_ipk_) :: err_act, nr, nc, ialg + integer(psb_ipk_) :: err_act, nr, nc, i, ierr(5) character(len=20) :: name info = psb_success_ name = 'mat_renum' call psb_erractionsave(err_act) + nr = mat%get_nrows() + nc = mat%get_ncols() + if (nr /= nc) then + info = psb_err_rectangular_mat_unsupported_ + ierr(1) = nr; ierr(2) = nc; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + info = psb_success_ select case (psb_toupper(alg)) case ('GPS') - ialg = psb_mat_renum_gps_ - case ('AMD') - ialg = psb_mat_renum_amd_ + + call psb_mat_renum_gps(mat,info,perm) + + case('AMD') + + call psb_mat_renum_amd(mat,info,perm) + case ('NONE', 'ID') - ialg = psb_mat_renum_identity_ + nr = mat%get_nrows() + allocate(perm(nr),stat=info) + if (info == 0) then + do i=1,nr + perm(i) = i + end do + else + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + endif case default write(0,*) 'Unknown algorithm "',psb_toupper(alg),'"' - ialg = -1 + info = psb_err_input_value_invalid_i_ + ierr(1) = 1; + call psb_errpush(info,name,i_err=ierr) + goto 9999 end select - - call psb_mat_renum(ialg,mat,info,perm) - + if (info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) @@ -71,26 +95,219 @@ subroutine psb_d_mat_renums(alg,mat,info,perm) 9999 call psb_error_handler(err_act) return -end subroutine psb_d_mat_renums - -subroutine psb_d_mat_renum(alg,mat,info,perm) + +contains + + subroutine psb_mat_renum_gps(a,info,operm) + use psb_base_mod + use psb_gps_mod + implicit none + type(psb_dspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:) + + ! + class(psb_d_base_sparse_mat), allocatable :: aa + type(psb_d_csr_sparse_mat) :: acsr + type(psb_d_coo_sparse_mat) :: acoo + + integer(psb_ipk_) :: err_act + character(len=20) :: name + integer(psb_ipk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) + integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth + + info = psb_success_ + name = 'mat_renum_gps' + call psb_erractionsave(err_act) + + info = psb_success_ + + call a%mold(aa) + call a%mv_to(aa) + call aa%mv_to_fmt(acsr,info) + ! Insert call to gps_reduce + nr = acsr%get_nrows() + ideg = 0 + do i=1, nr + ideg = max(ideg,acsr%irp(i+1)-acsr%irp(i)) + end do + allocate(ndstk(nr,ideg), iold(nr), perm(nr+1), ndeg(nr),stat=info) + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + do i=1, nr + iold(i) = i + ndstk(i,:) = 0 + k = 0 + do j=acsr%irp(i),acsr%irp(i+1)-1 + k = k + 1 + ndstk(i,k) = acsr%ja(j) + end do + end do + perm = 0 + + call psb_gps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth) + + if (.not.psb_isaperm(nr,perm)) then + write(0,*) 'Something wrong: bad perm from gps_reduce' + info = psb_err_from_subroutine_ + call psb_errpush(info,name) + goto 9999 + end if + ! Move to coordinate to apply renumbering + call acsr%mv_to_coo(acoo,info) + do i=1, acoo%get_nzeros() + acoo%ia(i) = perm(acoo%ia(i)) + acoo%ja(i) = perm(acoo%ja(i)) + end do + call acoo%fix(info) + + ! Get back to where we started from + call aa%mv_from_coo(acoo,info) + call a%mv_from(aa) + if (present(operm)) then + call psb_realloc(nr,operm,info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + operm(1:nr) = perm(1:nr) + end if + + deallocate(aa) + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_mat_renum_gps + + + subroutine psb_mat_renum_amd(a,info,operm) +#if defined(HAVE_AMD) + use iso_c_binding +#endif + use psb_base_mod + implicit none + type(psb_dspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:) + + ! +#if defined(HAVE_AMD) + interface + function psb_amd_order(n,ap,ai,p)& + & result(res) bind(c,name='psb_amd_order') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + integer(c_int) :: ap(*), ai(*), p(*) + end function psb_amd_order + end interface +#endif + + type(psb_d_csc_sparse_mat) :: acsc + class(psb_d_base_sparse_mat), allocatable :: aa + type(psb_d_coo_sparse_mat) :: acoo + + integer(psb_ipk_), allocatable :: perm(:) + integer(psb_ipk_) :: err_act + character(len=20) :: name + integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz + + info = psb_success_ + name = 'mat_renum_amd' + call psb_erractionsave(err_act) + +#if defined(HAVE_AMD) && defined(IPK4) + + info = psb_success_ + nr = a%get_nrows() + nz = a%get_nzeros() + allocate(perm(nr),stat=info) + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + allocate(aa, mold=a%a) + call a%mv_to(acsc) + + acsc%ia(:) = acsc%ia(:) - 1 + acsc%icp(:) = acsc%icp(:) - 1 + + info = psb_amd_order(nr,acsc%icp,acsc%ia,perm) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_amd_order') + goto 9999 + end if + + perm(:) = perm(:) + 1 + acsc%ia(:) = acsc%ia(:) + 1 + acsc%icp(:) = acsc%icp(:) + 1 + + call acsc%mv_to_coo(acoo,info) + do i=1, acoo%get_nzeros() + acoo%ia(i) = perm(acoo%ia(i)) + acoo%ja(i) = perm(acoo%ja(i)) + end do + call acoo%fix(info) + + ! Get back to where we started from + call aa%mv_from_coo(acoo,info) + call a%mv_from(aa) + if (present(operm)) then + call psb_realloc(nr,operm,info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + operm(1:nr) = perm(1:nr) + end if + + deallocate(aa,perm) + +#else + + info = psb_err_missing_aux_lib_ + call psb_errpush(info, name) + goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_mat_renum_amd + +end subroutine psb_d_mat_renum + +subroutine psb_ld_mat_renum(alg,mat,info,perm) use psb_base_mod - use psb_renum_mod, psb_protect_name => psb_d_mat_renum + use psb_renum_mod, psb_protect_name => psb_ld_mat_renum implicit none - integer(psb_ipk_), intent(in) :: alg - type(psb_dspmat_type), intent(inout) :: mat + character(len=*), intent(in) :: alg + type(psb_ldspmat_type), intent(inout) :: mat integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:) + integer(psb_lpk_), allocatable, optional, intent(out) :: perm(:) - integer(psb_ipk_) :: err_act, nr, nc, i, ierr(5) + integer(psb_lpk_) :: nr, nc, i + integer(psb_ipk_) :: err_act, ierr(5) character(len=20) :: name info = psb_success_ name = 'mat_renum' call psb_erractionsave(err_act) - info = psb_success_ - nr = mat%get_nrows() nc = mat%get_ncols() if (nr /= nc) then @@ -100,16 +317,17 @@ subroutine psb_d_mat_renum(alg,mat,info,perm) goto 9999 end if - select case (alg) - case(psb_mat_renum_gps_) + info = psb_success_ + select case (psb_toupper(alg)) + case ('GPS') - call psb_mat_renum_gps(mat,info,perm) + call psb_lmat_renum_gps(mat,info,perm) - case(psb_mat_renum_amd_) + case('AMD') - call psb_mat_renum_amd(mat,info,perm) + call psb_lmat_renum_amd(mat,info,perm) - case(psb_mat_renum_identity_) + case ('NONE', 'ID') nr = mat%get_nrows() allocate(perm(nr),stat=info) if (info == 0) then @@ -122,8 +340,9 @@ subroutine psb_d_mat_renum(alg,mat,info,perm) goto 9999 endif case default + write(0,*) 'Unknown algorithm "',psb_toupper(alg),'"' info = psb_err_input_value_invalid_i_ - ierr(1) = 1; ierr(2) = alg; + ierr(1) = 1; call psb_errpush(info,name,i_err=ierr) goto 9999 end select @@ -142,23 +361,23 @@ subroutine psb_d_mat_renum(alg,mat,info,perm) contains - subroutine psb_mat_renum_gps(a,info,operm) + subroutine psb_lmat_renum_gps(a,info,operm) use psb_base_mod - use psb_gps_mod + use psb_lgps_mod implicit none - type(psb_dspmat_type), intent(inout) :: a + type(psb_ldspmat_type), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:) + integer(psb_lpk_), allocatable, optional, intent(out) :: operm(:) ! - class(psb_d_base_sparse_mat), allocatable :: aa - type(psb_d_csr_sparse_mat) :: acsr - type(psb_d_coo_sparse_mat) :: acoo + class(psb_ld_base_sparse_mat), allocatable :: aa + type(psb_ld_csr_sparse_mat) :: acsr + type(psb_ld_coo_sparse_mat) :: acoo integer(psb_ipk_) :: err_act character(len=20) :: name - integer(psb_ipk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) - integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth + integer(psb_lpk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) + integer(psb_lpk_) :: i, j, k, ideg, nr, ibw, ipf, idpth info = psb_success_ name = 'mat_renum_gps' @@ -192,7 +411,7 @@ contains end do perm = 0 - call psb_gps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth) + call psb_lgps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth) if (.not.psb_isaperm(nr,perm)) then write(0,*) 'Something wrong: bad perm from gps_reduce' @@ -228,18 +447,18 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine psb_mat_renum_gps + end subroutine psb_lmat_renum_gps - subroutine psb_mat_renum_amd(a,info,operm) + subroutine psb_lmat_renum_amd(a,info,operm) #if defined(HAVE_AMD) use iso_c_binding #endif use psb_base_mod implicit none - type(psb_dspmat_type), intent(inout) :: a + type(psb_ldspmat_type), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:) + integer(psb_lpk_), allocatable, optional, intent(out) :: operm(:) ! #if defined(HAVE_AMD) @@ -254,20 +473,20 @@ contains end interface #endif - type(psb_d_csc_sparse_mat) :: acsc - class(psb_d_base_sparse_mat), allocatable :: aa - type(psb_d_coo_sparse_mat) :: acoo + type(psb_ld_csc_sparse_mat) :: acsc + class(psb_ld_base_sparse_mat), allocatable :: aa + type(psb_ld_coo_sparse_mat) :: acoo integer(psb_ipk_), allocatable :: perm(:) integer(psb_ipk_) :: err_act character(len=20) :: name - integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz + integer(psb_lpk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz info = psb_success_ name = 'mat_renum_amd' call psb_erractionsave(err_act) -#if defined(HAVE_AMD) && defined(IPK4) +#if defined(HAVE_AMD) && defined(LPK4) info = psb_success_ nr = a%get_nrows() @@ -331,10 +550,9 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine psb_mat_renum_amd - -end subroutine psb_d_mat_renum + end subroutine psb_lmat_renum_amd +end subroutine psb_ld_mat_renum subroutine psb_d_cmp_bwpf(mat,bwl,bwu,prf,info) use psb_base_mod @@ -385,3 +603,52 @@ subroutine psb_d_cmp_bwpf(mat,bwl,bwu,prf,info) end select end subroutine psb_d_cmp_bwpf + +subroutine psb_ld_cmp_bwpf(mat,bwl,bwu,prf,info) + use psb_base_mod + use psb_renum_mod, psb_protect_name => psb_ld_cmp_bwpf + implicit none + type(psb_ldspmat_type), intent(in) :: mat + integer(psb_lpk_), intent(out) :: bwl, bwu + integer(psb_lpk_), intent(out) :: prf + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_lpk_), allocatable :: irow(:), icol(:) + real(psb_dpk_), allocatable :: val(:) + integer(psb_lpk_) :: nz, i, j, lrbu, lrbl + + info = psb_success_ + bwl = 0 + bwu = 0 + prf = 0 + select type (aa=>mat%a) + class is (psb_ld_csr_sparse_mat) + do i=1, aa%get_nrows() + lrbl = 0 + lrbu = 0 + do j = aa%irp(i), aa%irp(i+1) - 1 + lrbl = max(lrbl,i-aa%ja(j)) + lrbu = max(lrbu,aa%ja(j)-i) + end do + prf = prf + lrbl+lrbu + bwu = max(bwu,lrbu) + bwl = max(bwl,lrbu) + end do + + class default + do i=1, aa%get_nrows() + lrbl = 0 + lrbu = 0 + call aa%csget(i,i,nz,irow,icol,val,info) + if (info /= psb_success_) return + do j=1, nz + lrbl = max(lrbl,i-icol(j)) + lrbu = max(lrbu,icol(j)-i) + end do + prf = prf + lrbl+lrbu + bwu = max(bwu,lrbu) + bwl = max(bwl,lrbu) + end do + end select + +end subroutine psb_ld_cmp_bwpf diff --git a/util/psb_d_renum_mod.f90 b/util/psb_d_renum_mod.f90 new file mode 100644 index 00000000..8e4b4e03 --- /dev/null +++ b/util/psb_d_renum_mod.f90 @@ -0,0 +1,69 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +module psb_d_renum_mod + use psb_base_mod + + interface psb_mat_renum + subroutine psb_d_mat_renum(alg,mat,info,perm) + import :: psb_ipk_, psb_dspmat_type + character(len=*), intent(in) :: alg + type(psb_dspmat_type), intent(inout) :: mat + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:) + end subroutine psb_d_mat_renum + subroutine psb_ld_mat_renum(alg,mat,info,perm) + import :: psb_ipk_, psb_lpk_, psb_ldspmat_type + character(len=*), intent(in) :: alg + type(psb_ldspmat_type), intent(inout) :: mat + integer(psb_ipk_), intent(out) :: info + integer(psb_lpk_), allocatable, optional, intent(out) :: perm(:) + end subroutine psb_ld_mat_renum + end interface psb_mat_renum + + interface psb_cmp_bwpf + subroutine psb_d_cmp_bwpf(mat,bwl,bwu,prf,info) + import :: psb_ipk_, psb_dspmat_type + type(psb_dspmat_type), intent(in) :: mat + integer(psb_ipk_), intent(out) :: bwl, bwu + integer(psb_ipk_), intent(out) :: prf + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cmp_bwpf + subroutine psb_ld_cmp_bwpf(mat,bwl,bwu,prf,info) + import :: psb_ipk_, psb_lpk_, psb_ldspmat_type + type(psb_ldspmat_type), intent(in) :: mat + integer(psb_lpk_), intent(out) :: bwl, bwu + integer(psb_lpk_), intent(out) :: prf + integer(psb_ipk_), intent(out) :: info + end subroutine psb_ld_cmp_bwpf + end interface psb_cmp_bwpf + +end module psb_d_renum_mod diff --git a/util/psb_gps_mod.f90 b/util/psb_gps_mod.f90 index fc701ffc..7c42d990 100644 --- a/util/psb_gps_mod.f90 +++ b/util/psb_gps_mod.f90 @@ -774,4 +774,744 @@ CONTAINS RETURN END SUBROUTINE REALLOC ! -END MODULE psb_gps_mod +end module psb_gps_mod + +module psb_lgps_mod + ! + use psb_base_mod, only : psb_lpk_ + public psb_lgps_reduce + ! + ! COMMON /GRA/ N, IDPTH, IDEG + ! + private + ! common /CC/ XCC,SIZEG,STPT + integer(psb_lpk_), save :: xcc,n,idpth,ideg + integer(psb_lpk_),allocatable,SAVE :: SIZEG(:),STPT(:) + ! + ! COMMON /LVLW/ NHIGH,NLOW,NACUM + integer(psb_lpk_),allocatable,target,save :: NHIGH(:),NLOW(:),NACUM(:),AUX(:) + integer(psb_lpk_),PARAMETER :: INIT=500 + ! +CONTAINS + ! +!!$ SUBROUTINE psb_gps_reduce(NDSTK, NR, IOLD, RENUM, NDEG, LVL, LVLS1, LVLS2,& +!!$ & CCSTOR, IBW2, IPF2,NE,IDPTHE,IDEGE) + SUBROUTINE psb_lgps_reduce(NDSTK, NR, IDEGE, IOLD, RENUM, NDEG,ibw2,ipf2,IDPTHE) + ! SUBROUTINE REDUCE DETERMINES A ROW AND COLUMN PERMUTATION WHICH, + ! WHEN APPLIED TO A GIVEN SPARSE MATRIX, PRODUCES A PERMUTED + ! MATRIX WITH A SMALLER BANDWIDTH AND PROFILE. + ! THE INPUT ARRAY IS A CONNECTION TABLE WHICH REPRESENTS THE + ! INDICES OF THE NONZERO ELEMENTS OF THE MATRIX, A. THE ALGO- + ! RITHM IS DESCRIBED IN TERMS OF THE ADJACENCY GRAPH WHICH + ! HAS THE CHARACTERISTIC THAT THERE IS AN EDGE (CONNECTION) + ! BETWEEN NODES I AND J IF A(I,J) /= 0 AND I /= J. + ! DIMENSIONING INFORMATION--THE FOLLOWING INTEGER ARRAYS MUST BE + ! DIMENSIONED IN THE CALLING ROUTINE. + ! NDSTK(NR,D1) D1 IS >= MAXIMUM DEGREE OF ALL NODES. + ! IOLD(D2) D2 AND NR ARE >= THE TOTAL NUMBER OF + ! RENUM(D2+1) NODES IN THE GRAPH. + ! NDEG(D2) STORAGE REQUIREMENTS CAN BE SIGNIFICANTLY + ! LVL(D2) DECREASED FOR IBM 360 AND 370 COMPUTERS + ! LVLS1(D2) BY REPLACING INTEGER NDSTK BY + ! LVLS2(D2) INTEGER*2 NDSTK IN SUBROUTINES REDUCE, + ! CCSTOR(D2) DGREE, FNDIAM, TREE AND NUMBER. + ! COMMON INFORMATION--THE FOLLOWING COMMON BLOCK MUST BE IN THE + ! CALLING ROUTINE. + ! COMMON/GRA/N,IDPTH,IDEG + ! EXPLANATION OF INPUT VARIABLES-- + ! NDSTK- CONNECTION TABLE REPRESENTING GRAPH. + ! NDSTK(I,J)=NODE NUMBER OF JTH CONNECTION TO NODE + ! NUMBER I. A CONNECTION OF A NODE TO ITSELF IS NOT + ! LISTED. EXTRA POSITIONS MUST HAVE ZERO FILL. + ! NR- ROW DIMENSION ASSIGNED NDSTK IN CALLING PROGRAM. + ! IOLD(I)- NUMBERING OF ITH NODE UPON INPUT. + ! IF NO NUMBERING EXISTS THEN IOLD(I)=I. + ! N- NUMBER OF NODES IN GRAPH (EQUAL TO ORDER OF MATRIX). + ! IDEG- MAXIMUM DEGREE OF ANY NODE IN THE GRAPH. + ! EXPLANATION OF OUTPUT VARIABLES-- + ! RENUM(I)- THE NEW NUMBER FOR THE ITH NODE. + ! NDEG(I)- THE DEGREE OF THE ITH NODE. + ! IBW2- THE BANDWIDTH AFTER RENUMBERING. + ! IPF2- THE PROFILE AFTER RENUMBERING. + ! IDPTH- NUMBER OF LEVELS IN REDUCE LEVEL STRUCTURE. + ! THE FOLLOWING ONLY HAVE MEANING IF THE GRAPH WAS CONNECTED-- + ! LVL(I)- INDEX INTO LVLS1 TO THE FIRST NODE IN LEVEL I. + ! LVL(I+1)-LVL(I)= NUMBER OF NODES IN ITH LEVEL + ! LVLS1- NODE NUMBERS LISTED BY LEVEL. + ! LVLS2(I)- THE LEVEL ASSIGNED TO NODE I BY REDUCE. + ! WORKING STORAGE VARIABLE-- + ! CCSTOR + ! LOCAL STORAGE-- + ! COMMON/CC/-SUBROUTINES REDUCE, SORT2 AND PIKLVL ASSUME THAT + ! THE GRAPH HAS AT MOST 50 CONNECTED COMPONENTS. + ! SUBROUTINE FNDIAM ASSUMES THAT THERE ARE AT MOST + ! 100 NODES IN THE LAST LEVEL. + ! COMMON/LVLW/-SUBROUTINES SETUP AND PIKLVL ASSUME THAT THERE + ! ARE AT MOST 100 LEVELS. + ! USE INTEGER*2 NDSTK WITH AN IBM 360 OR 370. + use psb_base_mod + implicit none + INTEGER(psb_lpk_) :: NR, IDEGE, IBW2, IPF2, IDPTHE + ! COMMON /GRA/ N, IDPTH, IDEG + ! IT IS ASSUMED THAT THE GRAPH HAS AT MOST 50 CONNECTED COMPONENTS. + ! COMMON /CC/ XCC, SIZEG(50), STPT(50) + ! COMMON /LVLW/ NHIGH(100), NLOW(100), NACUM(100) + integer(psb_lpk_) :: stnode, rvnode, stnum, sbnum + integer(psb_lpk_) :: ndstk(nr,idege), iold(nr), renum(nr+1), ndeg(nr) + integer(psb_lpk_) :: lvl(nr), lvls1(nr), lvls2(nr), ccstor(nr) + integer(psb_lpk_) :: nflg, info, i, ibw1, ipf1, idflt, isdir, lroot, lowdg + integer(psb_lpk_) :: lvlbot, lvln, lvlwth, maxlw, num + n = nr + ideg = idege + idpth = 0 + + ALLOCATE(SIZEG(NR),STPT(NR), STAT=INFO) + IF(INFO /= psb_success_) THEN + write(psb_out_unit,*) 'ERROR! MEMORY ALLOCATION # 1 FAILED IN GPS' + STOP + END IF + ! + ALLOCATE(NHIGH(INIT), NLOW(INIT), NACUM(INIT), AUX(INIT), STAT=INFO) + IF(INFO /= psb_success_) THEN + write(psb_out_unit,*) 'ERROR! MEMORY ALLOCATION # 2 FAILED IN GPS' + STOP + END IF + ! + IBW2 = 0 + IPF2 = 0 + ! SET RENUM(I)=0 FOR ALL I TO INDICATE NODE I IS UNNUMBERED + DO I=1,N + RENUM(I) = 0 + END DO + ! COMPUTE DEGREE OF EACH NODE AND ORIGINAL BANDWIDTH AND PROFILE + CALL DGREE(NDSTK, NR, NDEG, IOLD, IBW1, IPF1) + ! SBNUM= LOW END OF AVAILABLE NUMBERS FOR RENUMBERING + ! STNUM= HIGH END OF AVAILABLE NUMBERS FOR RENUMBERING + SBNUM = 1 + STNUM = N + ! NUMBER THE NODES OF DEGREE ZERO + DO I=1,N + IF (NDEG(I) > 0) CYCLE + RENUM(I) = STNUM + STNUM = STNUM - 1 + END DO + ! FIND AN UNNUMBERED NODE OF MIN DEGREE TO START ON + do + LOWDG = IDEG + 1 + NFLG = 1 + ISDIR = 1 + DO I=1,N + IF (NDEG(I) >= LOWDG) CYCLE + IF (RENUM(I) > 0) CYCLE + LOWDG = NDEG(I) + STNODE = I + END DO + ! FIND PSEUDO-DIAMETER AND ASSOCIATED LEVEL STRUCTURES. + ! STNODE AND RVNODE ARE THE ENDS OF THE DIAM AND LVLS1 AND LVLS2 + ! ARE THE RESPECTIVE LEVEL STRUCTURES. + CALL FNDIAM(STNODE, RVNODE, NDSTK, NR, NDEG, LVL, LVLS1,LVLS2, CCSTOR, IDFLT) + IF (.not.(ndeg(stnode) <= ndeg(rvnode))) then + ! NFLG INDICATES THE END TO BEGIN NUMBERING ON + NFLG = -1 + STNODE = RVNODE + endif + CALL SETUP(LVL, LVLS1, LVLS2) + ! FIND ALL THE CONNECTED COMPONENTS (XCC COUNTS THEM) + XCC = 0 + LROOT = 1 + LVLN = 1 + DO I=1,N + IF (LVL(I) /= 0) CYCLE + XCC = XCC + 1 + STPT(XCC) = LROOT + CALL TREE(I, NDSTK, NR, LVL, CCSTOR, NDEG, LVLWTH, LVLBOT,LVLN, MAXLW, N) + SIZEG(XCC) = LVLBOT + LVLWTH - LROOT + LROOT = LVLBOT + LVLWTH + LVLN = LROOT + END DO + if (sort2() /= 0) then + CALL PIKLVL(LVLS1, LVLS2, CCSTOR, IDFLT, ISDIR) + endif + ! ON RETURN FROM PIKLVL, ISDIR INDICATES THE DIRECTION THE LARGEST + ! COMPONENT FELL. ISDIR IS MODIFIED NOW TO INDICATE THE NUMBERING + ! DIRECTION. NUM IS SET TO THE PROPER VALUE FOR THIS DIRECTION. + ISDIR = ISDIR*NFLG + NUM = SBNUM + IF (ISDIR < 0) NUM = STNUM + CALL NUMBER(STNODE, NUM, NDSTK, LVLS2, NDEG, RENUM, LVLS1,LVL,& + & NR, NFLG, IBW2, IPF2, CCSTOR, ISDIR) + ! UPDATE STNUM OR SBNUM AFTER NUMBERING + IF (ISDIR < 0) STNUM = NUM + IF (ISDIR > 0) SBNUM = NUM + IF (.not.(sbnum <= stnum)) exit + end do + IF (IBW2 > IBW1) then + ! IF ORIGINAL NUMBERING IS BETTER THAN NEW ONE, SET UP TO RETURN IT + DO I=1,N + RENUM(I) = IOLD(I) + END DO + IBW2 = IBW1 + IPF2 = IPF1 + ! + endif + DEALLOCATE(SIZEG,STPT,NHIGH,NLOW,AUX,NACUM) + idpthe = idpth + RETURN + end subroutine psb_lgps_reduce + ! + SUBROUTINE DGREE(NDSTK, NR, NDEG, IOLD, IBW1, IPF1) + ! DGREE COMPUTES THE DEGREE OF EACH NODE IN NDSTK AND STORES + ! IT IN THE ARRAY NDEG. THE BANDWIDTH AND PROFILE FOR THE ORIGINAL + ! OR INPUT RENUMBERING OF THE GRAPH IS COMPUTED ALSO. + ! USE INTEGER*2 NDSTK WITH AN IBM 360 OR 370. + implicit none + INTEGER(psb_lpk_) :: NR, IBW1, IPF1, NDSTK(NR,IDEG), NDEG(N), IOLD(N) + ! COMMON /GRA/ N, IDPTH, IDEG + integer(psb_lpk_) :: i, itst, j, idif, irw + + IBW1 = 0 + IPF1 = 0 + DO I=1,N + NDEG(I) = 0 + IRW = 0 + DO J=1,IDEG + ITST = NDSTK(I,J) + IF(ITST <= 0) EXIT + NDEG(I) = NDEG(I) + 1 + IDIF = IOLD(I) - IOLD(ITST) + IF (IRW < IDIF) IRW = IDIF + END DO + IPF1 = IPF1 + IRW + IF (IRW > IBW1) IBW1 = IRW + END DO + RETURN + END SUBROUTINE DGREE + ! + SUBROUTINE FNDIAM(SND1, SND2, NDSTK, NR, NDEG, LVL, LVLS1,LVLS2, IWK, IDFLT) + ! FNDIAM IS THE CONTROL PROCEDURE FOR FINDING THE PSEUDO-DIAMETER OF + ! NDSTK AS WELL AS THE LEVEL STRUCTURE FROM EACH END + ! SND1- ON INPUT THIS IS THE NODE NUMBER OF THE FIRST + ! ATTEMPT AT FINDING A DIAMETER. ON OUTPUT IT + ! CONTAINS THE ACTUAL NUMBER USED. + ! SND2- ON OUTPUT CONTAINS OTHER END OF DIAMETER + ! LVLS1- ARRAY CONTAINING LEVEL STRUCTURE WITH SND1 AS ROOT + ! LVLS2- ARRAY CONTAINING LEVEL STRUCTURE WITH SND2 AS ROOT + ! IDFLT- FLAG USED IN PICKING FINAL LEVEL STRUCTURE, SET + ! =1 IF WIDTH OF LVLS1 <= WIDTH OF LVLS2, OTHERWISE =2 + ! LVL,IWK- WORKING STORAGE + ! USE INTEGER*2 NDSTK WITH AN IBM 360 OR 370. + implicit none + INTEGER(psb_lpk_) :: FLAG, SND, SND1, SND2, NR, idflt + ! COMMON /GRA/ N, IDPTH, IDEG + ! IT IS ASSUMED THAT THE LAST LEVEL HAS AT MOST 100 NODES. + ! COMMON /CC/ NDLST(100) + integer(psb_lpk_),POINTER :: NDLST(:) + integer(psb_lpk_) :: NDSTK(NR,IDEG), NDEG(1), LVL(N), LVLS1(N), LVLS2(N),IWK(N) + integer(psb_lpk_) :: i, j, mtw2, ndxn, ndxl, inow, lvlbot,lvln,lvlwth + integer(psb_lpk_) :: k,mtw1, maxlw + ! + NDLST => AUX + ! + FLAG = 0 + MTW2 = N + SND = SND1 + ! ZERO LVL TO INDICATE ALL NODES ARE AVAILABLE TO TREE +10 DO 20 I=1,N + LVL(I) = 0 +20 END DO + LVLN = 1 + ! DROP A TREE FROM SND + CALL TREE(SND, NDSTK, NR, LVL, IWK, NDEG, LVLWTH, LVLBOT,LVLN, MAXLW, MTW2) + IF (FLAG >= 1) GO TO 50 + FLAG = 1 +30 IDPTH = LVLN - 1 + MTW1 = MAXLW + ! COPY LEVEL STRUCTURE INTO LVLS1 + DO 40 I=1,N + LVLS1(I) = LVL(I) +40 END DO + NDXN = 1 + NDXL = 0 + MTW2 = N + ! SORT LAST LEVEL BY DEGREE AND STORE IN NDLST + CALL SORTDG(NDLST, IWK(LVLBOT), NDXL, LVLWTH, NDEG) + SND = NDLST(1) + GO TO 10 +50 IF (IDPTH >= LVLN-1) GO TO 60 + ! START AGAIN WITH NEW STARTING NODE + SND1 = SND + GO TO 30 +60 IF (MAXLW >= MTW2) GO TO 80 + MTW2 = MAXLW + SND2 = SND + ! STORE NARROWEST REVERSE LEVEL STRUCTURE IN LVLS2 + DO 70 I=1,N + LVLS2(I) = LVL(I) +70 END DO +80 IF (NDXN == NDXL) GO TO 90 + ! TRY NEXT NODE IN NDLST + NDXN = NDXN + 1 + SND = NDLST(NDXN) + GO TO 10 +90 IDFLT = 1 + IF (MTW2 <= MTW1) IDFLT = 2 + NULLIFY(NDLST) + RETURN + END SUBROUTINE FNDIAM + ! + SUBROUTINE TREE(IROOT, NDSTK, NR, LVL, IWK, NDEG, LVLWTH, LVLBOT, LVLN, MAXLW, IBORT) + ! TREE DROPS A TREE IN NDSTK FROM IROOT + ! LVL- ARRAY INDICATING AVAILABLE NODES IN NDSTK WITH ZERO + ! ENTRIES. TREE ENTERS LEVEL NUMBERS ASSIGNED + ! DURING EXECUTION OF THIS PROCEDURE + ! IWK- ON OUTPUT CONTAINS NODE NUMBERS USED IN TREE + ! ARRANGED BY LEVELS (IWK(LVLN) CONTAINS IROOT + ! AND IWK(LVLBOT+LVLWTH-1) CONTAINS LAST NODE ENTERED) + ! LVLWTH- ON OUTPUT CONTAINS WIDTH OF LAST LEVEL + ! LVLBOT- ON OUTPUT CONTAINS INDEX INTO IWK OF FIRST + ! NODE IN LAST LEVEL + ! MAXLW- ON OUTPUT CONTAINS THE MAXIMUM LEVEL WIDTH + ! LVLN- ON INPUT THE FIRST AVAILABLE LOCATION IN IWK + ! USUALLY ONE BUT IF IWK IS USED TO STORE PREVIOUS + ! CONNECTED COMPONENTS, LVLN IS NEXT AVAILABLE LOCATION. + ! ON OUTPUT THE TOTAL NUMBER OF LEVELS + 1 + ! IBORT- INPUT PARAM WHICH TRIGGERS EARLY RETURN IF + ! MAXLW BECOMES >= IBORT + ! USE INTEGER*2 NDSTK WITH AN IBM 360 OR 370. + implicit none + integer(psb_lpk_) :: IROOT, NR, NDSTK(NR,*), LVL(*), IWK(*), NDEG(*) + integer(psb_lpk_) :: LVLWTH, LVLBOT, LVLN, MAXLW, IBORT + integer(psb_lpk_) :: itest, iwknow, itop, lvltop,j , inow, ndrow + MAXLW = 0 + ITOP = LVLN + INOW = LVLN + LVLBOT = LVLN + LVLTOP = LVLN + 1 + LVLN = 1 + LVL(IROOT) = 1 + IWK(ITOP) = IROOT +10 LVLN = LVLN + 1 +20 IWKNOW = IWK(INOW) + NDROW = NDEG(IWKNOW) + DO 30 J=1,NDROW + ITEST = NDSTK(IWKNOW,J) + IF (LVL(ITEST) /= 0) CYCLE + LVL(ITEST) = LVLN + ITOP = ITOP + 1 + IWK(ITOP) = ITEST +30 END DO + INOW = INOW + 1 + IF (INOW < LVLTOP) GO TO 20 + LVLWTH = LVLTOP - LVLBOT + IF (MAXLW < LVLWTH) MAXLW = LVLWTH + IF (MAXLW >= IBORT) RETURN + IF (ITOP < LVLTOP) RETURN + LVLBOT = INOW + LVLTOP = ITOP + 1 + GO TO 10 + END SUBROUTINE TREE + ! + SUBROUTINE SORTDG(STK1, STK2, X1, X2, NDEG) + ! SORTDG SORTS STK2 BY DEGREE OF THE NODE AND ADDS IT TO THE END + ! OF STK1 IN ORDER OF LOWEST TO HIGHEST DEGREE. X1 AND X2 ARE THE + ! NUMBER OF NODES IN STK1 AND STK2 RESPECTIVELY. + implicit none + INTEGER(psb_lpk_) :: X1, X2, STK1, STK2, TEMP,NDEG + ! COMMON /GRA/ N, IDPTH, IDEG + DIMENSION NDEG(N), STK1(X1+X2), STK2(X2) + integer(psb_lpk_) :: ind,itest,i,j,istk2,jstk2 + IND = X2 +10 ITEST = 0 + IND = IND - 1 + IF (IND < 1) GO TO 30 + DO 20 I=1,IND + J = I + 1 + ISTK2 = STK2(I) + JSTK2 = STK2(J) + IF (NDEG(ISTK2) <= NDEG(JSTK2)) CYCLE + ITEST = 1 + TEMP = STK2(I) + STK2(I) = STK2(J) + STK2(J) = TEMP +20 END DO + IF (ITEST == 1) GO TO 10 +30 DO 40 I=1,X2 + X1 = X1 + 1 + STK1(X1) = STK2(I) +40 END DO + RETURN + END SUBROUTINE SORTDG + ! + SUBROUTINE SETUP(LVL, LVLS1, LVLS2) + ! SETUP COMPUTES THE REVERSE LEVELING INFO FROM LVLS2 AND STORES + ! IT INTO LVLS2. NACUM(I) IS INITIALIZED TO NODES/ITH LEVEL FOR NODES + ! ON THE PSEUDO-DIAMETER OF THE GRAPH. LVL IS INITIALIZED TO NON- + ! ZERO FOR NODES ON THE PSEUDO-DIAM AND NODES IN A DIFFERENT + ! COMPONENT OF THE GRAPH. + ! COMMON /GRA/ N, IDPTH, IDEG + ! IT IS ASSUMED THAT THERE ARE AT MOST 100 LEVELS. + ! COMMON /LVLW/ NHIGH(100), NLOW(100), NACUM(100) + use psb_base_mod + implicit none + integer(psb_lpk_) :: LVL(N), LVLS1(N), LVLS2(N) + integer(psb_lpk_) :: SZ,i,itemp + !----------------------------------------------------- + SZ=SIZE(NACUM) + IF(SZ < IDPTH) THEN + write(psb_out_unit,*) 'GPS_SETUP: on fly reallocation of NACUM' + CALL REALLOC(NACUM,SZ,IDPTH) + END IF + !----------------------------------------------------- + DO 10 I=1,IDPTH + NACUM(I) = 0 +10 END DO + DO 30 I=1,N + LVL(I) = 1 + LVLS2(I) = IDPTH + 1 - LVLS2(I) + ITEMP = LVLS2(I) + IF (ITEMP > IDPTH) CYCLE + IF (ITEMP /= LVLS1(I)) GO TO 20 + NACUM(ITEMP) = NACUM(ITEMP) + 1 + CYCLE +20 LVL(I) = 0 +30 END DO + RETURN + END SUBROUTINE SETUP + ! + FUNCTION SORT2() result(val) + implicit none + INTEGER(psb_lpk_) :: val + ! SORT2 SORTS SIZEG AND STPT INTO DESCENDING ORDER ACCORDING TO + ! VALUES OF SIZEG. XCC=NUMBER OF ENTRIES IN EACH ARRAY + INTEGER(psb_lpk_) :: TEMP,ind,itest,i,j + ! IT IS ASSUMED THAT THE GRAPH HAS AT MOST 50 CONNECTED COMPONENTS. + !COMMON /CC/ XCC, SIZEG(50), STPT(50) + + VAL = 0 + IF (XCC == 0) RETURN + VAL = 1 + IND = XCC +10 ITEST = 0 + IND = IND - 1 + IF (IND < 1) RETURN + DO 20 I=1,IND + J = I + 1 + IF (SIZEG(I) >= SIZEG(J)) CYCLE + ITEST = 1 + TEMP = SIZEG(I) + SIZEG(I) = SIZEG(J) + SIZEG(J) = TEMP + TEMP = STPT(I) + STPT(I) = STPT(J) + STPT(J) = TEMP +20 END DO + IF (ITEST == 1) GO TO 10 + RETURN + END FUNCTION SORT2 + ! + SUBROUTINE PIKLVL(LVLS1, LVLS2, CCSTOR, IDFLT, ISDIR) + use psb_base_mod + implicit none + ! PIKLVL CHOOSES THE LEVEL STRUCTURE USED IN NUMBERING GRAPH + ! LVLS1- ON INPUT CONTAINS FORWARD LEVELING INFO + ! LVLS2- ON INPUT CONTAINS REVERSE LEVELING INFO + ! ON OUTPUT THE FINAL LEVEL STRUCTURE CHOSEN + ! CCSTOR- ON INPUT CONTAINS CONNECTED COMPONENT INFO + ! IDFLT- ON INPUT =1 IF WDTH LVLS1 <= WDTH LVLS2, =2 OTHERWISE + ! NHIGH KEEPS TRACK OF LEVEL WIDTHS FOR HIGH NUMBERING + ! NLOW- KEEPS TRACK OF LEVEL WIDTHS FOR LOW NUMBERING + ! NACUM- KEEPS TRACK OF LEVEL WIDTHS FOR CHOSEN LEVEL STRUCTURE + ! XCC- NUMBER OF CONNECTED COMPONENTS + ! SIZEG(I)- SIZE OF ITH CONNECTED COMPONENT + ! STPT(I)- INDEX INTO CCSTORE OF 1ST NODE IN ITH CON COMPT + ! ISDIR- FLAG WHICH INDICATES WHICH WAY THE LARGEST CONNECTED + ! COMPONENT FELL. =+1 IF LOW AND -1 IF HIGH + ! COMMON /GRA/ N, IDPTH, IDEG + ! IT IS ASSUMED THAT THE GRAPH HAS AT MOST 50 COMPONENTS AND + ! THAT THERE ARE AT MOST 100 LEVELS. + ! COMMON /LVLW/ NHIGH(100), NLOW(100), NACUM(100) + ! COMMON /CC/ XCC, SIZEG(50), STPT(50) + INTEGER(psb_lpk_) :: LVLS1(N), LVLS2(N), CCSTOR(N) + integer(psb_lpk_) :: SZ, ENDC,i,j,max1,max2,inode + integer(psb_lpk_) :: lvlnh, it, k, lvlnl,idflt,isdir + ! FOR EACH CONNECTED COMPONENT DO + DO 80 I=1,XCC + J = STPT(I) + ENDC= SIZEG(I) + J - 1 + ! SET NHIGH AND NLOW EQUAL TO NACUM + !----------------------------------------------------- + SZ=SIZE(NHIGH) + IF(SZ < IDPTH) THEN + write(psb_out_unit,*) 'GPS_PIKLVL: on fly reallocation of NHIGH' + CALL REALLOC(NHIGH,SZ,IDPTH) + END IF + SZ=SIZE(NLOW) + IF(SZ < IDPTH) THEN + write(psb_out_unit,*) 'GPS_PIKLVL: on fly reallocation of NLOW' + CALL REALLOC(NLOW,SZ,IDPTH) + END IF + !----------------------------------------------------- + DO 10 K=1,IDPTH + NHIGH(K) = NACUM(K) + NLOW(K) = NACUM(K) +10 END DO + ! UPDATE NHIGH AND NLOW FOR EACH NODE IN CONNECTED COMPONENT + DO 20 K=J,ENDC + INODE = CCSTOR(K) + LVLNH = LVLS1(INODE) + NHIGH(LVLNH) = NHIGH(LVLNH) + 1 + LVLNL = LVLS2(INODE) + NLOW(LVLNL) = NLOW(LVLNL) + 1 +20 END DO + MAX1 = 0 + MAX2 = 0 + ! SET MAX1=LARGEST NEW NUMBER IN NHIGH + ! SET MAX2=LARGEST NEW NUMBER IN NLOW + DO 30 K=1,IDPTH + IF (2*NACUM(K) == NLOW(K)+NHIGH(K)) CYCLE + IF (NHIGH(K) > MAX1) MAX1 = NHIGH(K) + IF (NLOW(K) > MAX2) MAX2 = NLOW(K) +30 END DO + ! SET IT= NUMBER OF LEVEL STRUCTURE TO BE USED + IT = 1 + IF (MAX1 > MAX2) IT = 2 + IF (MAX1 == MAX2) IT = IDFLT + IF (IT == 2) GO TO 60 + IF (I == 1) ISDIR = -1 + ! COPY LVLS1 INTO LVLS2 FOR EACH NODE IN CONNECTED COMPONENT + DO 40 K=J,ENDC + INODE = CCSTOR(K) + LVLS2(INODE) = LVLS1(INODE) +40 END DO + ! UPDATE NACUM TO BE THE SAME AS NHIGH + DO 50 K=1,IDPTH + NACUM(K) = NHIGH(K) +50 END DO + CYCLE + ! UPDATE NACUM TO BE THE SAME AS NLOW +60 DO 70 K=1,IDPTH + NACUM(K) = NLOW(K) +70 END DO +80 END DO + RETURN + END SUBROUTINE PIKLVL + ! + SUBROUTINE NUMBER(SND, NUM, NDSTK, LVLS2, NDEG, RENUM, LVLST,LSTPT,& + & NR, NFLG, IBW2, IPF2, IPFA, ISDIR) + use psb_base_mod + implicit none + ! NUMBER PRODUCES THE NUMBERING OF THE GRAPH FOR MIN BANDWIDTH + ! SND- ON INPUT THE NODE TO BEGIN NUMBERING ON + ! NUM- ON INPUT AND OUTPUT, THE NEXT AVAILABLE NUMBER + ! LVLS2- THE LEVEL STRUCTURE TO BE USED IN NUMBERING + ! RENUM- THE ARRAY USED TO STORE THE NEW NUMBERING + ! LVLST- ON OUTPUT CONTAINS LEVEL STRUCTURE + ! LSTPT(I)- ON OUTPUT, INDEX INTO LVLST TO FIRST NODE IN ITH LVL + ! LSTPT(I+1) - LSTPT(I) = NUMBER OF NODES IN ITH LVL + ! NFLG- =+1 IF SND IS FORWARD END OF PSEUDO-DIAM + ! =-1 IF SND IS REVERSE END OF PSEUDO-DIAM + ! IBW2- BANDWIDTH OF NEW NUMBERING COMPUTED BY NUMBER + ! IPF2- PROFILE OF NEW NUMBERING COMPUTED BY NUMBER + ! IPFA- WORKING STORAGE USED TO COMPUTE PROFILE AND BANDWIDTH + ! ISDIR- INDICATES STEP DIRECTION USED IN NUMBERING(+1 OR -1) + ! USE INTEGER*2 NDSTK WITH AN IBM 360 OR 370. + INTEGER(psb_lpk_) :: SND, NUM, XA, XB, XC, XD, CX, ENDC, TEST, NR, ISDIR + ! COMMON /GRA/ N, IDPTH, IDEG + ! THE STORAGE IN COMMON BLOCKS CC AND LVLW IS NOW FREE AND CAN + ! BE USED FOR STACKS. + !COMMON /LVLW/ STKA(100), STKB(100), STKC(100) + !COMMON /CC/ STKD(100) + INTEGER(psb_lpk_) :: IPFA(N), NDSTK(NR,IDEG), LVLS2(N),& + & NDEG(N), RENUM(N+1), LVLST(N),LSTPT(N),ipf2,ibw2,nflg, nbw + integer(psb_lpk_),POINTER :: STKA(:),STKB(:),STKC(:),STKD(:) + integer(psb_lpk_) :: SZ1,SZ2,i,j,nstpt,lvln, lst, lnd, inx, max, ipro,& + & lvlnl, k, it + ! + STKA => NHIGH + STKB => NLOW + STKC => NACUM + STKD => AUX + ! + ! SET UP LVLST AND LSTPT FROM LVLS2 + DO 10 I=1,N + IPFA(I) = 0 +10 END DO + NSTPT = 1 + DO 30 I=1,IDPTH + LSTPT(I) = NSTPT + DO 20 J=1,N + IF (LVLS2(J) /= I) CYCLE + LVLST(NSTPT) = J + NSTPT = NSTPT + 1 +20 END DO +30 END DO + LSTPT(IDPTH+1) = NSTPT + ! STKA, STKB, STKC AND STKD ARE STACKS WITH POINTERS + ! XA,XB,XC, AND XD. CX IS A SPECIAL POINTER INTO STKC WHICH + ! INDICATES THE PARTICULAR NODE BEING PROCESSED. + ! LVLN KEEPS TRACK OF THE LEVEL WE ARE WORKING AT. + ! INITIALLY STKC CONTAINS ONLY THE INITIAL NODE, SND. + LVLN = 0 + IF (NFLG < 0) LVLN = IDPTH + 1 + XC = 1 + STKC(XC) = SND +40 CX = 1 + XD = 0 + LVLN = LVLN + NFLG + LST = LSTPT(LVLN) + LND = LSTPT(LVLN+1) - 1 + ! BEGIN PROCESSING NODE STKC(CX) +50 IPRO = STKC(CX) + RENUM(IPRO) = NUM + NUM = NUM + ISDIR + ENDC = NDEG(IPRO) + XA = 0 + XB = 0 + ! CHECK ALL ADJACENT NODES + DO 80 I=1,ENDC + TEST = NDSTK(IPRO,I) + INX = RENUM(TEST) + ! ONLY NODES NOT NUMBERED OR ALREADY ON A STACK ARE ADDED + IF (INX == 0) GO TO 60 + IF (INX < 0) CYCLE + ! DO PRELIMINARY BANDWIDTH AND PROFILE CALCULATIONS + NBW = (RENUM(IPRO)-INX)*ISDIR + IF (ISDIR > 0) INX = RENUM(IPRO) + IF (IPFA(INX) < NBW) IPFA(INX) = NBW + CYCLE +60 RENUM(TEST) = -1 + ! PUT NODES ON SAME LEVEL ON STKA, ALL OTHERS ON STKB + IF (LVLS2(TEST) == LVLS2(IPRO)) GO TO 70 + XB = XB + 1 + STKB(XB) = TEST + CYCLE +70 XA = XA + 1 + STKA(XA) = TEST +80 END DO + ! SORT STKA AND STKB INTO INCREASING DEGREE AND ADD STKA TO STKC + ! AND STKB TO STKD + IF (XA == 0) GO TO 100 + IF (XA == 1) GO TO 90 + !----------------------------------------------------------------- + SZ1=SIZE(STKC) + SZ2=XC+XA + IF(SZ1 < SZ2) THEN + write(psb_out_unit,*) 'GPS_NUMBER - Check #1: on fly reallocation of STKC' + CALL REALLOC(NACUM,SZ1,SZ2) + STKC => NACUM + END IF + !----------------------------------------------------------------- + CALL SORTDG(STKC, STKA, XC, XA, NDEG) + GO TO 100 +90 XC = XC + 1 + !----------------------------------------------------------------- + SZ1=SIZE(STKC) + SZ2=XC + IF(SZ1 < SZ2) THEN + write(psb_out_unit,*) 'GPS_NUMBER - Check #2: on fly reallocation of STKC' + SZ2=SZ2+INIT + CALL REALLOC(NACUM,SZ1,SZ2) + STKC => NACUM + END IF + !----------------------------------------------------------------- + STKC(XC) = STKA(XA) +100 IF (XB == 0) GO TO 120 + IF (XB == 1) GO TO 110 + !----------------------------------------------------------------- + SZ1=SIZE(STKD) + SZ2=XD+XB + IF(SZ1 < SZ2) THEN + write(psb_out_unit,*) 'GPS_NUMBER - Check #3: on fly reallocation of STKD' + CALL REALLOC(AUX,SZ1,SZ2) + STKD => AUX + END IF + !----------------------------------------------------------------- + CALL SORTDG(STKD, STKB, XD, XB, NDEG) + GO TO 120 +110 XD = XD + 1 + !----------------------------------------------------------------- + SZ1=SIZE(STKD) + SZ2=XD + IF(SZ1 < SZ2) THEN + write(psb_out_unit,*) 'GPS_NUMBER - Check #4: on fly reallocation of STKD' + SZ2=SZ2+INIT + CALL REALLOC(AUX,SZ1,SZ2) + STKD => AUX + END IF + !----------------------------------------------------------------- + STKD(XD) = STKB(XB) + ! BE SURE TO PROCESS ALL NODES IN STKC +120 CX = CX + 1 + IF (XC >= CX) GO TO 50 + ! WHEN STKC IS EXHAUSTED LOOK FOR MIN DEGREE NODE IN SAME LEVEL + ! WHICH HAS NOT BEEN PROCESSED + MAX = IDEG + 1 + SND = N + 1 + DO 130 I=LST,LND + TEST = LVLST(I) + IF (RENUM(TEST) /= 0) CYCLE + IF (NDEG(TEST) >= MAX) CYCLE + RENUM(SND) = 0 + RENUM(TEST) = -1 + MAX = NDEG(TEST) + SND = TEST +130 END DO + IF (SND == N+1) GO TO 140 + XC = XC + 1 + !----------------------------------------------------------------- + SZ1=SIZE(STKC) + SZ2=XC + IF(SZ1 < SZ2) THEN + write(psb_out_unit,*) 'GPS_NUMBER - Check #5: on fly reallocation of STKC' + SZ2=SZ2+INIT + CALL REALLOC(NACUM,SZ1,SZ2) + STKC => NACUM + END IF + !----------------------------------------------------------------- + STKC(XC) = SND + GO TO 50 + ! IF STKD IS EMPTY WE ARE DONE, OTHERWISE COPY STKD ONTO STKC + ! AND BEGIN PROCESSING NEW STKC +140 IF (XD == 0) GO TO 160 + !----------------------------------------------------------------- + SZ1=SIZE(STKC) + SZ2=XD + IF(SZ1 < SZ2) THEN + write(psb_out_unit,*) 'GPS_NUMBER - Check #6: on fly reallocation of STKC' + SZ2=SZ2+INIT + CALL REALLOC(NACUM,SZ1,SZ2) + STKC => NACUM + END IF + !----------------------------------------------------------------- + DO 150 I=1,XD + STKC(I) = STKD(I) +150 END DO + XC = XD + GO TO 40 + ! DO FINAL BANDWIDTH AND PROFILE CALCULATIONS +160 DO 170 I=1,N + IF (IPFA(I) > IBW2) IBW2 = IPFA(I) + IPF2 = IPF2 + IPFA(I) +170 END DO + ! + RETURN + END SUBROUTINE NUMBER + ! + ! --------------------------------------------------------- + SUBROUTINE REALLOC(VET,SZ1,SZ2) + use psb_base_mod + ! PERFORM ON FLY REALLOCATION OF POINTER VET INCREASING + ! ITS SIZE FROM SZ1 TO SZ2 + IMPLICIT NONE + integer(psb_lpk_),allocatable :: VET(:) + integer(psb_lpk_) :: SZ1,SZ2 + integer(psb_ipk_) :: info + call psb_realloc(sz2,vet,info) + IF(INFO /= psb_success_) THEN + write(psb_out_unit,*) 'Error! Memory allocation failure in REALLOC' + STOP + END IF + RETURN + END SUBROUTINE REALLOC + ! +end module psb_lgps_mod diff --git a/util/psb_renum_mod.f90 b/util/psb_renum_mod.f90 index 82d51712..9a060cc6 100644 --- a/util/psb_renum_mod.f90 +++ b/util/psb_renum_mod.f90 @@ -32,101 +32,8 @@ module psb_renum_mod use psb_base_mod - integer(psb_ipk_), parameter :: psb_mat_renum_identity_ = 0 - integer(psb_ipk_), parameter :: psb_mat_renum_gps_ = 456 - integer(psb_ipk_), parameter :: psb_mat_renum_amd_ = psb_mat_renum_gps_ + 1 - - - interface psb_mat_renum - subroutine psb_d_mat_renums(alg,mat,info,perm) - import :: psb_ipk_, psb_dspmat_type - character(len=*), intent(in) :: alg - type(psb_dspmat_type), intent(inout) :: mat - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:) - end subroutine psb_d_mat_renums - subroutine psb_d_mat_renum(alg,mat,info,perm) - import :: psb_ipk_, psb_dspmat_type - integer(psb_ipk_), intent(in) :: alg - type(psb_dspmat_type), intent(inout) :: mat - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:) - end subroutine psb_d_mat_renum - subroutine psb_s_mat_renums(alg,mat,info,perm) - import :: psb_ipk_, psb_sspmat_type - character(len=*), intent(in) :: alg - type(psb_sspmat_type), intent(inout) :: mat - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:) - end subroutine psb_s_mat_renums - subroutine psb_s_mat_renum(alg,mat,info,perm) - import :: psb_ipk_, psb_sspmat_type - integer(psb_ipk_), intent(in) :: alg - type(psb_sspmat_type), intent(inout) :: mat - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:) - end subroutine psb_s_mat_renum - subroutine psb_z_mat_renums(alg,mat,info,perm) - import :: psb_ipk_, psb_zspmat_type - character(len=*), intent(in) :: alg - type(psb_zspmat_type), intent(inout) :: mat - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:) - end subroutine psb_z_mat_renums - subroutine psb_z_mat_renum(alg,mat,info,perm) - import :: psb_ipk_, psb_zspmat_type - integer(psb_ipk_), intent(in) :: alg - type(psb_zspmat_type), intent(inout) :: mat - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:) - end subroutine psb_z_mat_renum - subroutine psb_c_mat_renums(alg,mat,info,perm) - import :: psb_ipk_, psb_cspmat_type - character(len=*), intent(in) :: alg - type(psb_cspmat_type), intent(inout) :: mat - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:) - end subroutine psb_c_mat_renums - subroutine psb_c_mat_renum(alg,mat,info,perm) - import :: psb_ipk_, psb_cspmat_type - integer(psb_ipk_), intent(in) :: alg - type(psb_cspmat_type), intent(inout) :: mat - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:) - end subroutine psb_c_mat_renum - end interface psb_mat_renum - - - interface psb_cmp_bwpf - subroutine psb_s_cmp_bwpf(mat,bwl,bwu,prf,info) - import :: psb_ipk_, psb_sspmat_type - type(psb_sspmat_type), intent(in) :: mat - integer(psb_ipk_), intent(out) :: bwl, bwu - integer(psb_ipk_), intent(out) :: prf - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cmp_bwpf - subroutine psb_d_cmp_bwpf(mat,bwl,bwu,prf,info) - import :: psb_ipk_, psb_dspmat_type - type(psb_dspmat_type), intent(in) :: mat - integer(psb_ipk_), intent(out) :: bwl, bwu - integer(psb_ipk_), intent(out) :: prf - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cmp_bwpf - subroutine psb_c_cmp_bwpf(mat,bwl,bwu,prf,info) - import :: psb_ipk_, psb_cspmat_type - type(psb_cspmat_type), intent(in) :: mat - integer(psb_ipk_), intent(out) :: bwl, bwu - integer(psb_ipk_), intent(out) :: prf - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cmp_bwpf - subroutine psb_z_cmp_bwpf(mat,bwl,bwu,prf,info) - import :: psb_ipk_, psb_zspmat_type - type(psb_zspmat_type), intent(in) :: mat - integer(psb_ipk_), intent(out) :: bwl, bwu - integer(psb_ipk_), intent(out) :: prf - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cmp_bwpf - end interface psb_cmp_bwpf - - + use psb_s_renum_mod + use psb_c_renum_mod + use psb_d_renum_mod + use psb_z_renum_mod end module psb_renum_mod diff --git a/util/psb_s_renum_impl.F90 b/util/psb_s_renum_impl.F90 index 008bbbb0..6bb37a3c 100644 --- a/util/psb_s_renum_impl.F90 +++ b/util/psb_s_renum_impl.F90 @@ -29,37 +29,61 @@ ! POSSIBILITY OF SUCH DAMAGE. ! ! -subroutine psb_s_mat_renums(alg,mat,info,perm) +subroutine psb_s_mat_renum(alg,mat,info,perm) use psb_base_mod - use psb_renum_mod, psb_protect_name => psb_s_mat_renums + use psb_renum_mod, psb_protect_name => psb_s_mat_renum implicit none character(len=*), intent(in) :: alg type(psb_sspmat_type), intent(inout) :: mat integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:) - integer(psb_ipk_) :: err_act, nr, nc, ialg + integer(psb_ipk_) :: err_act, nr, nc, i, ierr(5) character(len=20) :: name info = psb_success_ name = 'mat_renum' call psb_erractionsave(err_act) + nr = mat%get_nrows() + nc = mat%get_ncols() + if (nr /= nc) then + info = psb_err_rectangular_mat_unsupported_ + ierr(1) = nr; ierr(2) = nc; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + info = psb_success_ select case (psb_toupper(alg)) case ('GPS') - ialg = psb_mat_renum_gps_ - case ('AMD') - ialg = psb_mat_renum_amd_ + + call psb_mat_renum_gps(mat,info,perm) + + case('AMD') + + call psb_mat_renum_amd(mat,info,perm) + case ('NONE', 'ID') - ialg = psb_mat_renum_identity_ + nr = mat%get_nrows() + allocate(perm(nr),stat=info) + if (info == 0) then + do i=1,nr + perm(i) = i + end do + else + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + endif case default write(0,*) 'Unknown algorithm "',psb_toupper(alg),'"' - ialg = -1 + info = psb_err_input_value_invalid_i_ + ierr(1) = 1; + call psb_errpush(info,name,i_err=ierr) + goto 9999 end select - - call psb_mat_renum(ialg,mat,info,perm) - + if (info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) @@ -72,26 +96,218 @@ subroutine psb_s_mat_renums(alg,mat,info,perm) 9999 call psb_error_handler(err_act) return -end subroutine psb_s_mat_renums - -subroutine psb_s_mat_renum(alg,mat,info,perm) +contains + + subroutine psb_mat_renum_gps(a,info,operm) + use psb_base_mod + use psb_gps_mod + implicit none + type(psb_sspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:) + + ! + class(psb_s_base_sparse_mat), allocatable :: aa + type(psb_s_csr_sparse_mat) :: acsr + type(psb_s_coo_sparse_mat) :: acoo + + integer(psb_ipk_) :: err_act + character(len=20) :: name + integer(psb_ipk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) + integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth + + info = psb_success_ + name = 'mat_renum_gps' + call psb_erractionsave(err_act) + + info = psb_success_ + + call a%mold(aa) + call a%mv_to(aa) + call aa%mv_to_fmt(acsr,info) + ! Insert call to gps_reduce + nr = acsr%get_nrows() + ideg = 0 + do i=1, nr + ideg = max(ideg,acsr%irp(i+1)-acsr%irp(i)) + end do + allocate(ndstk(nr,ideg), iold(nr), perm(nr+1), ndeg(nr),stat=info) + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + do i=1, nr + iold(i) = i + ndstk(i,:) = 0 + k = 0 + do j=acsr%irp(i),acsr%irp(i+1)-1 + k = k + 1 + ndstk(i,k) = acsr%ja(j) + end do + end do + perm = 0 + + call psb_gps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth) + + if (.not.psb_isaperm(nr,perm)) then + write(0,*) 'Something wrong: bad perm from gps_reduce' + info = psb_err_from_subroutine_ + call psb_errpush(info,name) + goto 9999 + end if + ! Move to coordinate to apply renumbering + call acsr%mv_to_coo(acoo,info) + do i=1, acoo%get_nzeros() + acoo%ia(i) = perm(acoo%ia(i)) + acoo%ja(i) = perm(acoo%ja(i)) + end do + call acoo%fix(info) + + ! Get back to where we started from + call aa%mv_from_coo(acoo,info) + call a%mv_from(aa) + if (present(operm)) then + call psb_realloc(nr,operm,info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + operm(1:nr) = perm(1:nr) + end if + + deallocate(aa) + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_mat_renum_gps + + + subroutine psb_mat_renum_amd(a,info,operm) +#if defined(HAVE_AMD) + use iso_c_binding +#endif + use psb_base_mod + implicit none + type(psb_sspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:) + + ! +#if defined(HAVE_AMD) + interface + function psb_amd_order(n,ap,ai,p)& + & result(res) bind(c,name='psb_amd_order') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + integer(c_int) :: ap(*), ai(*), p(*) + end function psb_amd_order + end interface +#endif + + type(psb_s_csc_sparse_mat) :: acsc + class(psb_s_base_sparse_mat), allocatable :: aa + type(psb_s_coo_sparse_mat) :: acoo + + integer(psb_ipk_), allocatable :: perm(:) + integer(psb_ipk_) :: err_act + character(len=20) :: name + integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz + + info = psb_success_ + name = 'mat_renum_amd' + call psb_erractionsave(err_act) + +#if defined(HAVE_AMD) && defined(IPK4) + + info = psb_success_ + nr = a%get_nrows() + nz = a%get_nzeros() + allocate(perm(nr),stat=info) + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + allocate(aa, mold=a%a) + call a%mv_to(acsc) + + acsc%ia(:) = acsc%ia(:) - 1 + acsc%icp(:) = acsc%icp(:) - 1 + + info = psb_amd_order(nr,acsc%icp,acsc%ia,perm) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_amd_order') + goto 9999 + end if + + perm(:) = perm(:) + 1 + acsc%ia(:) = acsc%ia(:) + 1 + acsc%icp(:) = acsc%icp(:) + 1 + + call acsc%mv_to_coo(acoo,info) + do i=1, acoo%get_nzeros() + acoo%ia(i) = perm(acoo%ia(i)) + acoo%ja(i) = perm(acoo%ja(i)) + end do + call acoo%fix(info) + + ! Get back to where we started from + call aa%mv_from_coo(acoo,info) + call a%mv_from(aa) + if (present(operm)) then + call psb_realloc(nr,operm,info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + operm(1:nr) = perm(1:nr) + end if + + deallocate(aa,perm) + +#else + + info = psb_err_missing_aux_lib_ + call psb_errpush(info, name) + goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_mat_renum_amd + +end subroutine psb_s_mat_renum + +subroutine psb_ls_mat_renum(alg,mat,info,perm) use psb_base_mod - use psb_renum_mod, psb_protect_name => psb_s_mat_renum + use psb_renum_mod, psb_protect_name => psb_ls_mat_renum implicit none - integer(psb_ipk_), intent(in) :: alg - type(psb_sspmat_type), intent(inout) :: mat + character(len=*), intent(in) :: alg + type(psb_lsspmat_type), intent(inout) :: mat integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:) + integer(psb_lpk_), allocatable, optional, intent(out) :: perm(:) - integer(psb_ipk_) :: err_act, nr, nc, i, ierr(5) + integer(psb_lpk_) :: nr, nc, i + integer(psb_ipk_) :: err_act, ierr(5) character(len=20) :: name info = psb_success_ name = 'mat_renum' call psb_erractionsave(err_act) - info = psb_success_ - nr = mat%get_nrows() nc = mat%get_ncols() if (nr /= nc) then @@ -101,16 +317,17 @@ subroutine psb_s_mat_renum(alg,mat,info,perm) goto 9999 end if - select case (alg) - case(psb_mat_renum_gps_) + info = psb_success_ + select case (psb_toupper(alg)) + case ('GPS') - call psb_mat_renum_gps(mat,info,perm) + call psb_lmat_renum_gps(mat,info,perm) - case(psb_mat_renum_amd_) + case('AMD') - call psb_mat_renum_amd(mat,info,perm) + call psb_lmat_renum_amd(mat,info,perm) - case(psb_mat_renum_identity_) + case ('NONE', 'ID') nr = mat%get_nrows() allocate(perm(nr),stat=info) if (info == 0) then @@ -123,8 +340,9 @@ subroutine psb_s_mat_renum(alg,mat,info,perm) goto 9999 endif case default + write(0,*) 'Unknown algorithm "',psb_toupper(alg),'"' info = psb_err_input_value_invalid_i_ - ierr(1) = 1; ierr(2) = alg; + ierr(1) = 1; call psb_errpush(info,name,i_err=ierr) goto 9999 end select @@ -143,26 +361,26 @@ subroutine psb_s_mat_renum(alg,mat,info,perm) contains - subroutine psb_mat_renum_gps(a,info,operm) + subroutine psb_lmat_renum_gps(a,info,operm) use psb_base_mod - use psb_gps_mod + use psb_lgps_mod implicit none - type(psb_sspmat_type), intent(inout) :: a + type(psb_lsspmat_type), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:) + integer(psb_lpk_), allocatable, optional, intent(out) :: operm(:) ! - class(psb_s_base_sparse_mat), allocatable :: aa - type(psb_s_csr_sparse_mat) :: acsr - type(psb_s_coo_sparse_mat) :: acoo + class(psb_ls_base_sparse_mat), allocatable :: aa + type(psb_ls_csr_sparse_mat) :: acsr + type(psb_ls_coo_sparse_mat) :: acoo integer(psb_ipk_) :: err_act character(len=20) :: name - integer(psb_ipk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) - integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth + integer(psb_lpk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) + integer(psb_lpk_) :: i, j, k, ideg, nr, ibw, ipf, idpth info = psb_success_ - name = 'mat_renum' + name = 'mat_renum_gps' call psb_erractionsave(err_act) info = psb_success_ @@ -193,7 +411,7 @@ contains end do perm = 0 - call psb_gps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth) + call psb_lgps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth) if (.not.psb_isaperm(nr,perm)) then write(0,*) 'Something wrong: bad perm from gps_reduce' @@ -229,18 +447,18 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine psb_mat_renum_gps + end subroutine psb_lmat_renum_gps - subroutine psb_mat_renum_amd(a,info,operm) + subroutine psb_lmat_renum_amd(a,info,operm) #if defined(HAVE_AMD) use iso_c_binding #endif use psb_base_mod implicit none - type(psb_sspmat_type), intent(inout) :: a + type(psb_lsspmat_type), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:) + integer(psb_lpk_), allocatable, optional, intent(out) :: operm(:) ! #if defined(HAVE_AMD) @@ -255,20 +473,20 @@ contains end interface #endif - type(psb_s_csc_sparse_mat) :: acsc - class(psb_s_base_sparse_mat), allocatable :: aa - type(psb_s_coo_sparse_mat) :: acoo + type(psb_ls_csc_sparse_mat) :: acsc + class(psb_ls_base_sparse_mat), allocatable :: aa + type(psb_ls_coo_sparse_mat) :: acoo integer(psb_ipk_), allocatable :: perm(:) integer(psb_ipk_) :: err_act character(len=20) :: name - integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz + integer(psb_lpk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz info = psb_success_ name = 'mat_renum_amd' call psb_erractionsave(err_act) -#if defined(HAVE_AMD) && defined(IPK4) +#if defined(HAVE_AMD) && defined(LPK4) info = psb_success_ nr = a%get_nrows() @@ -332,11 +550,9 @@ contains 9999 call psb_error_handler(err_act) return + end subroutine psb_lmat_renum_amd - end subroutine psb_mat_renum_amd - -end subroutine psb_s_mat_renum - +end subroutine psb_ls_mat_renum subroutine psb_s_cmp_bwpf(mat,bwl,bwu,prf,info) use psb_base_mod @@ -387,3 +603,52 @@ subroutine psb_s_cmp_bwpf(mat,bwl,bwu,prf,info) end select end subroutine psb_s_cmp_bwpf + +subroutine psb_ls_cmp_bwpf(mat,bwl,bwu,prf,info) + use psb_base_mod + use psb_renum_mod, psb_protect_name => psb_ls_cmp_bwpf + implicit none + type(psb_lsspmat_type), intent(in) :: mat + integer(psb_lpk_), intent(out) :: bwl, bwu + integer(psb_lpk_), intent(out) :: prf + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_lpk_), allocatable :: irow(:), icol(:) + real(psb_spk_), allocatable :: val(:) + integer(psb_lpk_) :: nz, i, j, lrbu, lrbl + + info = psb_success_ + bwl = 0 + bwu = 0 + prf = 0 + select type (aa=>mat%a) + class is (psb_ls_csr_sparse_mat) + do i=1, aa%get_nrows() + lrbl = 0 + lrbu = 0 + do j = aa%irp(i), aa%irp(i+1) - 1 + lrbl = max(lrbl,i-aa%ja(j)) + lrbu = max(lrbu,aa%ja(j)-i) + end do + prf = prf + lrbl+lrbu + bwu = max(bwu,lrbu) + bwl = max(bwl,lrbu) + end do + + class default + do i=1, aa%get_nrows() + lrbl = 0 + lrbu = 0 + call aa%csget(i,i,nz,irow,icol,val,info) + if (info /= psb_success_) return + do j=1, nz + lrbl = max(lrbl,i-icol(j)) + lrbu = max(lrbu,icol(j)-i) + end do + prf = prf + lrbl+lrbu + bwu = max(bwu,lrbu) + bwl = max(bwl,lrbu) + end do + end select + +end subroutine psb_ls_cmp_bwpf diff --git a/util/psb_s_renum_mod.f90 b/util/psb_s_renum_mod.f90 new file mode 100644 index 00000000..1e048df5 --- /dev/null +++ b/util/psb_s_renum_mod.f90 @@ -0,0 +1,69 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +module psb_s_renum_mod + use psb_base_mod + + interface psb_mat_renum + subroutine psb_s_mat_renum(alg,mat,info,perm) + import :: psb_ipk_, psb_sspmat_type + character(len=*), intent(in) :: alg + type(psb_sspmat_type), intent(inout) :: mat + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:) + end subroutine psb_s_mat_renum + subroutine psb_ls_mat_renum(alg,mat,info,perm) + import :: psb_ipk_, psb_lpk_, psb_lsspmat_type + character(len=*), intent(in) :: alg + type(psb_lsspmat_type), intent(inout) :: mat + integer(psb_ipk_), intent(out) :: info + integer(psb_lpk_), allocatable, optional, intent(out) :: perm(:) + end subroutine psb_ls_mat_renum + end interface psb_mat_renum + + interface psb_cmp_bwpf + subroutine psb_s_cmp_bwpf(mat,bwl,bwu,prf,info) + import :: psb_ipk_, psb_sspmat_type + type(psb_sspmat_type), intent(in) :: mat + integer(psb_ipk_), intent(out) :: bwl, bwu + integer(psb_ipk_), intent(out) :: prf + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cmp_bwpf + subroutine psb_ls_cmp_bwpf(mat,bwl,bwu,prf,info) + import :: psb_ipk_, psb_lpk_, psb_lsspmat_type + type(psb_lsspmat_type), intent(in) :: mat + integer(psb_lpk_), intent(out) :: bwl, bwu + integer(psb_lpk_), intent(out) :: prf + integer(psb_ipk_), intent(out) :: info + end subroutine psb_ls_cmp_bwpf + end interface psb_cmp_bwpf + +end module psb_s_renum_mod diff --git a/util/psb_z_renum_impl.F90 b/util/psb_z_renum_impl.F90 index aa8f6b72..a27d4523 100644 --- a/util/psb_z_renum_impl.F90 +++ b/util/psb_z_renum_impl.F90 @@ -29,37 +29,61 @@ ! POSSIBILITY OF SUCH DAMAGE. ! ! -subroutine psb_z_mat_renums(alg,mat,info,perm) +subroutine psb_z_mat_renum(alg,mat,info,perm) use psb_base_mod - use psb_renum_mod, psb_protect_name => psb_z_mat_renums + use psb_renum_mod, psb_protect_name => psb_z_mat_renum implicit none character(len=*), intent(in) :: alg type(psb_zspmat_type), intent(inout) :: mat integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:) - integer(psb_ipk_) :: err_act, nr, nc, ialg + integer(psb_ipk_) :: err_act, nr, nc, i, ierr(5) character(len=20) :: name info = psb_success_ name = 'mat_renum' call psb_erractionsave(err_act) + nr = mat%get_nrows() + nc = mat%get_ncols() + if (nr /= nc) then + info = psb_err_rectangular_mat_unsupported_ + ierr(1) = nr; ierr(2) = nc; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + info = psb_success_ select case (psb_toupper(alg)) case ('GPS') - ialg = psb_mat_renum_gps_ - case ('AMD') - ialg = psb_mat_renum_amd_ + + call psb_mat_renum_gps(mat,info,perm) + + case('AMD') + + call psb_mat_renum_amd(mat,info,perm) + case ('NONE', 'ID') - ialg = psb_mat_renum_identity_ + nr = mat%get_nrows() + allocate(perm(nr),stat=info) + if (info == 0) then + do i=1,nr + perm(i) = i + end do + else + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + endif case default write(0,*) 'Unknown algorithm "',psb_toupper(alg),'"' - ialg = -1 + info = psb_err_input_value_invalid_i_ + ierr(1) = 1; + call psb_errpush(info,name,i_err=ierr) + goto 9999 end select - - call psb_mat_renum(ialg,mat,info,perm) - + if (info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) @@ -72,26 +96,218 @@ subroutine psb_z_mat_renums(alg,mat,info,perm) 9999 call psb_error_handler(err_act) return -end subroutine psb_z_mat_renums - -subroutine psb_z_mat_renum(alg,mat,info,perm) +contains + + subroutine psb_mat_renum_gps(a,info,operm) + use psb_base_mod + use psb_gps_mod + implicit none + type(psb_zspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:) + + ! + class(psb_z_base_sparse_mat), allocatable :: aa + type(psb_z_csr_sparse_mat) :: acsr + type(psb_z_coo_sparse_mat) :: acoo + + integer(psb_ipk_) :: err_act + character(len=20) :: name + integer(psb_ipk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) + integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth + + info = psb_success_ + name = 'mat_renum_gps' + call psb_erractionsave(err_act) + + info = psb_success_ + + call a%mold(aa) + call a%mv_to(aa) + call aa%mv_to_fmt(acsr,info) + ! Insert call to gps_reduce + nr = acsr%get_nrows() + ideg = 0 + do i=1, nr + ideg = max(ideg,acsr%irp(i+1)-acsr%irp(i)) + end do + allocate(ndstk(nr,ideg), iold(nr), perm(nr+1), ndeg(nr),stat=info) + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + do i=1, nr + iold(i) = i + ndstk(i,:) = 0 + k = 0 + do j=acsr%irp(i),acsr%irp(i+1)-1 + k = k + 1 + ndstk(i,k) = acsr%ja(j) + end do + end do + perm = 0 + + call psb_gps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth) + + if (.not.psb_isaperm(nr,perm)) then + write(0,*) 'Something wrong: bad perm from gps_reduce' + info = psb_err_from_subroutine_ + call psb_errpush(info,name) + goto 9999 + end if + ! Move to coordinate to apply renumbering + call acsr%mv_to_coo(acoo,info) + do i=1, acoo%get_nzeros() + acoo%ia(i) = perm(acoo%ia(i)) + acoo%ja(i) = perm(acoo%ja(i)) + end do + call acoo%fix(info) + + ! Get back to where we started from + call aa%mv_from_coo(acoo,info) + call a%mv_from(aa) + if (present(operm)) then + call psb_realloc(nr,operm,info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + operm(1:nr) = perm(1:nr) + end if + + deallocate(aa) + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_mat_renum_gps + + + subroutine psb_mat_renum_amd(a,info,operm) +#if defined(HAVE_AMD) + use iso_c_binding +#endif + use psb_base_mod + implicit none + type(psb_zspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:) + + ! +#if defined(HAVE_AMD) + interface + function psb_amd_order(n,ap,ai,p)& + & result(res) bind(c,name='psb_amd_order') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + integer(c_int) :: ap(*), ai(*), p(*) + end function psb_amd_order + end interface +#endif + + type(psb_z_csc_sparse_mat) :: acsc + class(psb_z_base_sparse_mat), allocatable :: aa + type(psb_z_coo_sparse_mat) :: acoo + + integer(psb_ipk_), allocatable :: perm(:) + integer(psb_ipk_) :: err_act + character(len=20) :: name + integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz + + info = psb_success_ + name = 'mat_renum_amd' + call psb_erractionsave(err_act) + +#if defined(HAVE_AMD) && defined(IPK4) + + info = psb_success_ + nr = a%get_nrows() + nz = a%get_nzeros() + allocate(perm(nr),stat=info) + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + allocate(aa, mold=a%a) + call a%mv_to(acsc) + + acsc%ia(:) = acsc%ia(:) - 1 + acsc%icp(:) = acsc%icp(:) - 1 + + info = psb_amd_order(nr,acsc%icp,acsc%ia,perm) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_amd_order') + goto 9999 + end if + + perm(:) = perm(:) + 1 + acsc%ia(:) = acsc%ia(:) + 1 + acsc%icp(:) = acsc%icp(:) + 1 + + call acsc%mv_to_coo(acoo,info) + do i=1, acoo%get_nzeros() + acoo%ia(i) = perm(acoo%ia(i)) + acoo%ja(i) = perm(acoo%ja(i)) + end do + call acoo%fix(info) + + ! Get back to where we started from + call aa%mv_from_coo(acoo,info) + call a%mv_from(aa) + if (present(operm)) then + call psb_realloc(nr,operm,info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + operm(1:nr) = perm(1:nr) + end if + + deallocate(aa,perm) + +#else + + info = psb_err_missing_aux_lib_ + call psb_errpush(info, name) + goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_mat_renum_amd + +end subroutine psb_z_mat_renum + +subroutine psb_lz_mat_renum(alg,mat,info,perm) use psb_base_mod - use psb_renum_mod, psb_protect_name => psb_z_mat_renum + use psb_renum_mod, psb_protect_name => psb_lz_mat_renum implicit none - integer(psb_ipk_), intent(in) :: alg - type(psb_zspmat_type), intent(inout) :: mat + character(len=*), intent(in) :: alg + type(psb_lzspmat_type), intent(inout) :: mat integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:) + integer(psb_lpk_), allocatable, optional, intent(out) :: perm(:) - integer(psb_ipk_) :: err_act, nr, nc, i, ierr(5) + integer(psb_lpk_) :: nr, nc, i + integer(psb_ipk_) :: err_act, ierr(5) character(len=20) :: name info = psb_success_ name = 'mat_renum' call psb_erractionsave(err_act) - info = psb_success_ - nr = mat%get_nrows() nc = mat%get_ncols() if (nr /= nc) then @@ -101,16 +317,17 @@ subroutine psb_z_mat_renum(alg,mat,info,perm) goto 9999 end if - select case (alg) - case(psb_mat_renum_gps_) + info = psb_success_ + select case (psb_toupper(alg)) + case ('GPS') - call psb_mat_renum_gps(mat,info,perm) + call psb_lmat_renum_gps(mat,info,perm) - case(psb_mat_renum_amd_) + case('AMD') - call psb_mat_renum_amd(mat,info,perm) + call psb_lmat_renum_amd(mat,info,perm) - case(psb_mat_renum_identity_) + case ('NONE', 'ID') nr = mat%get_nrows() allocate(perm(nr),stat=info) if (info == 0) then @@ -123,8 +340,9 @@ subroutine psb_z_mat_renum(alg,mat,info,perm) goto 9999 endif case default + write(0,*) 'Unknown algorithm "',psb_toupper(alg),'"' info = psb_err_input_value_invalid_i_ - ierr(1) = 1; ierr(2) = alg; + ierr(1) = 1; call psb_errpush(info,name,i_err=ierr) goto 9999 end select @@ -143,26 +361,26 @@ subroutine psb_z_mat_renum(alg,mat,info,perm) contains - subroutine psb_mat_renum_gps(a,info,operm) + subroutine psb_lmat_renum_gps(a,info,operm) use psb_base_mod - use psb_gps_mod + use psb_lgps_mod implicit none - type(psb_zspmat_type), intent(inout) :: a + type(psb_lzspmat_type), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:) + integer(psb_lpk_), allocatable, optional, intent(out) :: operm(:) ! - class(psb_z_base_sparse_mat), allocatable :: aa - type(psb_z_csr_sparse_mat) :: acsr - type(psb_z_coo_sparse_mat) :: acoo + class(psb_lz_base_sparse_mat), allocatable :: aa + type(psb_lz_csr_sparse_mat) :: acsr + type(psb_lz_coo_sparse_mat) :: acoo integer(psb_ipk_) :: err_act character(len=20) :: name - integer(psb_ipk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) - integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth + integer(psb_lpk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) + integer(psb_lpk_) :: i, j, k, ideg, nr, ibw, ipf, idpth info = psb_success_ - name = 'mat_renum' + name = 'mat_renum_gps' call psb_erractionsave(err_act) info = psb_success_ @@ -193,7 +411,7 @@ contains end do perm = 0 - call psb_gps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth) + call psb_lgps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth) if (.not.psb_isaperm(nr,perm)) then write(0,*) 'Something wrong: bad perm from gps_reduce' @@ -228,18 +446,19 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine psb_mat_renum_gps + end subroutine psb_lmat_renum_gps - subroutine psb_mat_renum_amd(a,info,operm) + + subroutine psb_lmat_renum_amd(a,info,operm) #if defined(HAVE_AMD) use iso_c_binding #endif use psb_base_mod implicit none - type(psb_zspmat_type), intent(inout) :: a + type(psb_lzspmat_type), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:) + integer(psb_lpk_), allocatable, optional, intent(out) :: operm(:) ! #if defined(HAVE_AMD) @@ -254,20 +473,20 @@ contains end interface #endif - type(psb_z_csc_sparse_mat) :: acsc - class(psb_z_base_sparse_mat), allocatable :: aa - type(psb_z_coo_sparse_mat) :: acoo + type(psb_lz_csc_sparse_mat) :: acsc + class(psb_lz_base_sparse_mat), allocatable :: aa + type(psb_lz_coo_sparse_mat) :: acoo integer(psb_ipk_), allocatable :: perm(:) integer(psb_ipk_) :: err_act character(len=20) :: name - integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz + integer(psb_lpk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz info = psb_success_ name = 'mat_renum_amd' call psb_erractionsave(err_act) -#if defined(HAVE_AMD) && defined(IPK4) +#if defined(HAVE_AMD) && defined(LPK4) info = psb_success_ nr = a%get_nrows() @@ -331,10 +550,9 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine psb_mat_renum_amd - -end subroutine psb_z_mat_renum + end subroutine psb_lmat_renum_amd +end subroutine psb_lz_mat_renum subroutine psb_z_cmp_bwpf(mat,bwl,bwu,prf,info) use psb_base_mod @@ -385,3 +603,52 @@ subroutine psb_z_cmp_bwpf(mat,bwl,bwu,prf,info) end select end subroutine psb_z_cmp_bwpf + +subroutine psb_lz_cmp_bwpf(mat,bwl,bwu,prf,info) + use psb_base_mod + use psb_renum_mod, psb_protect_name => psb_lz_cmp_bwpf + implicit none + type(psb_lzspmat_type), intent(in) :: mat + integer(psb_lpk_), intent(out) :: bwl, bwu + integer(psb_lpk_), intent(out) :: prf + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_lpk_), allocatable :: irow(:), icol(:) + complex(psb_dpk_), allocatable :: val(:) + integer(psb_lpk_) :: nz, i, j, lrbu, lrbl + + info = psb_success_ + bwl = 0 + bwu = 0 + prf = 0 + select type (aa=>mat%a) + class is (psb_lz_csr_sparse_mat) + do i=1, aa%get_nrows() + lrbl = 0 + lrbu = 0 + do j = aa%irp(i), aa%irp(i+1) - 1 + lrbl = max(lrbl,i-aa%ja(j)) + lrbu = max(lrbu,aa%ja(j)-i) + end do + prf = prf + lrbl+lrbu + bwu = max(bwu,lrbu) + bwl = max(bwl,lrbu) + end do + + class default + do i=1, aa%get_nrows() + lrbl = 0 + lrbu = 0 + call aa%csget(i,i,nz,irow,icol,val,info) + if (info /= psb_success_) return + do j=1, nz + lrbl = max(lrbl,i-icol(j)) + lrbu = max(lrbu,icol(j)-i) + end do + prf = prf + lrbl+lrbu + bwu = max(bwu,lrbu) + bwl = max(bwl,lrbu) + end do + end select + +end subroutine psb_lz_cmp_bwpf diff --git a/util/psb_z_renum_mod.f90 b/util/psb_z_renum_mod.f90 new file mode 100644 index 00000000..0bb56c35 --- /dev/null +++ b/util/psb_z_renum_mod.f90 @@ -0,0 +1,69 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +module psb_z_renum_mod + use psb_base_mod + + interface psb_mat_renum + subroutine psb_z_mat_renum(alg,mat,info,perm) + import :: psb_ipk_, psb_zspmat_type + character(len=*), intent(in) :: alg + type(psb_zspmat_type), intent(inout) :: mat + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:) + end subroutine psb_z_mat_renum + subroutine psb_lz_mat_renum(alg,mat,info,perm) + import :: psb_ipk_, psb_lpk_, psb_lzspmat_type + character(len=*), intent(in) :: alg + type(psb_lzspmat_type), intent(inout) :: mat + integer(psb_ipk_), intent(out) :: info + integer(psb_lpk_), allocatable, optional, intent(out) :: perm(:) + end subroutine psb_lz_mat_renum + end interface psb_mat_renum + + interface psb_cmp_bwpf + subroutine psb_z_cmp_bwpf(mat,bwl,bwu,prf,info) + import :: psb_ipk_, psb_zspmat_type + type(psb_zspmat_type), intent(in) :: mat + integer(psb_ipk_), intent(out) :: bwl, bwu + integer(psb_ipk_), intent(out) :: prf + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cmp_bwpf + subroutine psb_lz_cmp_bwpf(mat,bwl,bwu,prf,info) + import :: psb_ipk_, psb_lpk_, psb_lzspmat_type + type(psb_lzspmat_type), intent(in) :: mat + integer(psb_lpk_), intent(out) :: bwl, bwu + integer(psb_lpk_), intent(out) :: prf + integer(psb_ipk_), intent(out) :: info + end subroutine psb_lz_cmp_bwpf + end interface psb_cmp_bwpf + +end module psb_z_renum_mod