diff --git a/Makefile b/Makefile index 831d6302..26cc69ba 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -PSBLASDIR=../psblas2 +PSBLASDIR=../psblas2-dev include $(PSBLASDIR)/Make.inc @@ -8,21 +8,22 @@ INCDIRS=-I. -I$(LIBDIR) MODOBJS= psb_prec_type.o psb_prec_mod.o MPFOBJS=psb_dbldaggrmat.o psb_zbldaggrmat.o +MPCOBJS=psb_slud_impl.o psb_zslud_impl.o F90OBJS=psb_dasmatbld.o psb_dslu_bld.o psb_dumf_bld.o psb_dilu_fct.o\ psb_dmlprc_bld.o psb_dsp_renum.o psb_dbjac_bld.o psb_dilu_bld.o \ psb_dprecbld.o psb_dprecfree.o psb_dprecset.o \ psb_dbaseprc_bld.o psb_ddiagsc_bld.o psb_dgenaggrmap.o \ - psb_dprc_aply.o psb_dmlprc_aply.o \ + psb_dprc_aply.o psb_dmlprc_aply.o psb_dslud_bld.o\ psb_dbaseprc_aply.o psb_dbjac_aply.o\ psb_zasmatbld.o psb_zslu_bld.o psb_zumf_bld.o psb_zilu_fct.o\ psb_zmlprc_bld.o psb_zsp_renum.o psb_zbjac_bld.o psb_zilu_bld.o \ psb_zprecbld.o psb_zprecfree.o psb_zprecset.o \ psb_zbaseprc_bld.o psb_zdiagsc_bld.o psb_zgenaggrmap.o \ - psb_zprc_aply.o psb_zmlprc_aply.o \ + psb_zprc_aply.o psb_zmlprc_aply.o psb_zslud_bld.o\ psb_zbaseprc_aply.o psb_zbjac_aply.o\ $(MPFOBJS) COBJS=psb_slu_impl.o psb_umf_impl.o psb_zslu_impl.o psb_zumf_impl.o -OBJS=$(F90OBJS) $(COBJS) $(MPFOBJS) $(MODOBJS) +OBJS=$(F90OBJS) $(COBJS) $(MPFOBJS) $(MPCOBJS) $(MODOBJS) LIBMOD=psb_prec_mod$(.mod) LOCAL_MODS=$(LIBMOD) psb_prec_type$(.mod) @@ -39,6 +40,7 @@ psb_prec_mod.o: psb_prec_type.o mpobjs: (make $(MPFOBJS) F90="$(MPF90)" F90COPT="$(F90COPT)") + (make $(MPCOBJS) CC="$(MPCC)" CCOPT="$(CCOPT)") veryclean: clean /bin/rm -f $(LIBNAME) diff --git a/psb_dbaseprc_bld.f90 b/psb_dbaseprc_bld.f90 index f1d8ad59..6ee2eff1 100644 --- a/psb_dbaseprc_bld.f90 +++ b/psb_dbaseprc_bld.f90 @@ -131,6 +131,11 @@ subroutine psb_dbaseprc_bld(a,desc_a,p,info,upd) & renum_none_,is_legal_renum) call psb_check_def(p%iprcparm(f_type_),'fact',& & f_ilu_n_,is_legal_ml_fact) + + if (p%iprcparm(f_type_)==f_slud_) then + p%iprcparm(n_ovr_) = 0 + p%iprcparm(jac_sweeps_) = 1 + end if if (debug) write(0,*)me, ': Calling PSB_BJAC_BLD' if (debug) call psb_barrier(ictxt) diff --git a/psb_dbjac_aply.f90 b/psb_dbjac_aply.f90 index 96817b03..bb19aceb 100644 --- a/psb_dbjac_aply.f90 +++ b/psb_dbjac_aply.f90 @@ -61,7 +61,7 @@ subroutine psb_dbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) character ::diagl, diagu integer :: ictxt,np,me,i, nrg, err_act, int_err(5) real(kind(1.d0)) :: t1, t2, t3, t4, t5, t6, t7 - logical,parameter :: debug=.false., debugprt=.false. + logical,parameter :: debug=.false., debugprt=.false. character(len=20) :: name, ch_err name='psb_dbjac_aply' @@ -83,8 +83,8 @@ subroutine psb_dbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) end select - n_row=desc_data%matrix_data(psb_n_row_) - n_col=desc_data%matrix_data(psb_n_col_) + n_row = psb_cd_get_local_rows(desc_data) + n_col = psb_cd_get_local_cols(desc_data) if (n_col <= size(work)) then ww => work(1:n_col) @@ -106,10 +106,12 @@ subroutine psb_dbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) end if endif - + if (debug) then + write(0,*) me,' BJAC_APLY: ',prec%iprcparm(f_type_),prec%iprcparm(jac_sweeps_) + end if + if (prec%iprcparm(jac_sweeps_) == 1) then - select case(prec%iprcparm(f_type_)) case(f_ilu_n_,f_ilu_e_) @@ -149,6 +151,21 @@ subroutine psb_dbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) if(info /=0) goto 9999 call psb_geaxpby(alpha,ww,beta,y,desc_data,info) + case(f_slud_) + +!!$ write(0,*) 'Calling SLUDist_solve ',n_row + ww(1:n_row) = x(1:n_row) + + select case(trans) + case('N','n') + call psb_dsludist_solve(0,n_row,1,ww,n_row,prec%iprcparm(slud_ptr_),info) + case('T','t','C','c') + call psb_dsludist_solve(1,n_row,1,ww,n_row,prec%iprcparm(slud_ptr_),info) + end select + + if(info /=0) goto 9999 + call psb_geaxpby(alpha,ww,beta,y,desc_data,info) + case (f_umf_) @@ -204,6 +221,10 @@ subroutine psb_dbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) if(info /=0) goto 9999 end do + case(f_slud_) + write(0,*) 'No sense in having SLUDist with JAC_SWEEPS >1' + info=4010 + goto 9999 case(f_slu_) do i=1, prec%iprcparm(jac_sweeps_) ! X(k+1) = M^-1*(b-N*X(k)) @@ -239,7 +260,9 @@ subroutine psb_dbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) else - + info = 10 + call psb_errpush(info,name,& + & i_err=(/2,prec%iprcparm(jac_sweeps_),0,0,0/)) goto 9999 endif diff --git a/psb_dbjac_bld.f90 b/psb_dbjac_bld.f90 index c560ad57..214081a7 100644 --- a/psb_dbjac_bld.f90 +++ b/psb_dbjac_bld.f90 @@ -378,6 +378,59 @@ subroutine psb_dbjac_bld(a,desc_a,p,upd,info) end if + case(f_slud_) + + atmp%fida='COO' + call psb_csdp(a,atmp,info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='psb_csdp') + goto 9999 + end if + + n_row = psb_cd_get_local_rows(p%desc_data) + n_col = psb_cd_get_local_cols(p%desc_data) + call psb_rwextd(n_row,atmp,info,b=blck,rowscale=.false.) + + if (p%iprcparm(jac_sweeps_) > 1) then + !------------------------------------------------------------------ + ! Split AC=M+N N off-diagonal part + ! Output in COO format. + call psb_sp_clip(atmp,p%av(ap_nd_),info,& + & jmin=atmp%m+1,rscale=.false.,cscale=.false.) + + call psb_ipcoo2csr(p%av(ap_nd_),info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_ipcoo2csr') + goto 9999 + end if + + k = psb_sp_get_nnzeros(p%av(ap_nd_)) + call psb_sum(ictxt,k) + + if (k == 0) then + ! If the off diagonal part is emtpy, there's no point + ! in doing multiple Jacobi sweeps. This is certain + ! to happen when running on a single processor. + p%iprcparm(jac_sweeps_) = 1 + end if + endif + +!!$ nztmp = psb_sp_get_nnzeros(atmp) +!!$ call psb_loc_to_glob(atmp%ia2(1:nztmp),p%desc_data,info,iact='I') + if (info == 0) call psb_ipcoo2csr(atmp,info) + if (info == 0) call psb_sludist_bld(atmp,p%desc_data,p,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='slu_bld') + goto 9999 + end if + + call psb_sp_free(atmp,info) + if(info/=0) then + call psb_errpush(4010,name,a_err='psb_sp_free') + goto 9999 + end if +!!$ + case(f_umf_) @@ -396,6 +449,7 @@ subroutine psb_dbjac_bld(a,desc_a,p,upd,info) !------------------------------------------------------------------ ! Split AC=M+N N off-diagonal part ! Output in COO format. +!!$ write(0,*) 'bjac_bld:' size(p%av),ap_nd_ call psb_sp_clip(atmp,p%av(ap_nd_),info,& & jmin=atmp%m+1,rscale=.false.,cscale=.false.) diff --git a/psb_dbldaggrmat.f90 b/psb_dbldaggrmat.f90 index f2348d5d..9e8c0816 100644 --- a/psb_dbldaggrmat.f90 +++ b/psb_dbldaggrmat.f90 @@ -264,6 +264,33 @@ contains goto 9999 end if + if (.false.) then + !if(.not.associated(p%av(ap_nd_)%aspk)) p%iprcparm(jac_sweeps_) = 1 + !------------------------------------------------------------------ + ! Split AC=M+N N off-diagonal part + ! Output in COO format. + call psb_sp_clip(ac,p%av(ap_nd_),info,& + & jmin=ac%m+1,rscale=.false.,cscale=.false.) + + call psb_ipcoo2csr(p%av(ap_nd_),info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_ipcoo2csr') + goto 9999 + end if + + k = psb_sp_get_nnzeros(p%av(ap_nd_)) + call psb_sum(ictxt,k) + + if (k == 0) then + ! If the off diagonal part is emtpy, there's no point + ! in doing multiple Jacobi sweeps. This is certain + ! to happen when running on a single processor. + p%iprcparm(jac_sweeps_) = 1 + end if + !write(0,*) 'operations in bldaggrmat are ok !' + !------------------------------------------------------------------ + end if + else write(0,*) 'Unknown p%iprcparm(coarse_mat) in aggregate_sp',p%iprcparm(coarse_mat_) @@ -765,6 +792,28 @@ contains deallocate(ivall,nzbr,idisp) + if (.false.) then + ! Split AC=M+N N off-diagonal part + ! Output in COO format. + call psb_sp_clip(ac,p%av(ap_nd_),info,& + & jmin=ac%m+1,rscale=.false.,cscale=.false.) + + call psb_ipcoo2csr(p%av(ap_nd_),info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_ipcoo2csr') + goto 9999 + end if + + k = psb_sp_get_nnzeros(p%av(ap_nd_)) + call psb_sum(ictxt,k) + + if (k == 0) then + ! If the off diagonal part is emtpy, there's no point + ! in doing multiple Jacobi sweeps. This is certain + ! to happen when running on a single processor. + p%iprcparm(jac_sweeps_) = 1 + end if + end if if (np>1) then nzl = psb_sp_get_nnzeros(am1) diff --git a/psb_dprecset.f90 b/psb_dprecset.f90 index 38467236..26e6c8db 100644 --- a/psb_dprecset.f90 +++ b/psb_dprecset.f90 @@ -134,6 +134,7 @@ subroutine psb_dprecset(p,ptype,info,iv,rs,rv,ilev,nlev) if (isz >= 2) p%baseprecv(ilev_)%iprcparm(restr_) = iv(2) if (isz >= 3) p%baseprecv(ilev_)%iprcparm(prol_) = iv(3) if (isz >= 4) p%baseprecv(ilev_)%iprcparm(f_type_) = iv(4) + if (isz >= 5) p%baseprecv(ilev_)%iprcparm(jac_sweeps_) = iv(5) ! Do not consider renum for the time being. !!$ if (isz >= 5) p%baseprecv(ilev_)%iprcparm(iren_) = iv(5) end if diff --git a/psb_dslud_bld.f90 b/psb_dslud_bld.f90 new file mode 100644 index 00000000..ede73263 --- /dev/null +++ b/psb_dslud_bld.f90 @@ -0,0 +1,111 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_dsludist_bld(a,desc_a,p,info) + use psb_base_mod + use psb_prec_mod, mld_protect_name => psb_dsludist_bld + + implicit none + + type(psb_dspmat_type), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_dbaseprc_type), intent(inout) :: p + integer, intent(out) :: info + + integer :: i,j,nza,nzb,nzt,ictxt,me,np,err_act,& + & mglob,ifrst,ibcheck,nrow,ncol,npr,npc + logical, parameter :: debug=.false. + character(len=20) :: name, ch_err + + if (psb_get_errstatus().ne.0) return + info=0 + name='psb_dslu_bld' + call psb_erractionsave(err_act) + + ictxt = psb_cd_get_context(desc_a) + + call psb_info(ictxt, me, np) + + if (toupper(a%fida) /= 'CSR') then + write(0,*) 'Unimplemented input to SLU_BLD' + goto 9999 + endif + + + ! + ! WARN we need to check for a BLOCK distribution. + ! + nrow = psb_cd_get_local_rows(desc_a) + ncol = psb_cd_get_local_cols(desc_a) + ifrst = desc_a%loc_to_glob(1) + ibcheck = desc_a%loc_to_glob(nrow) - ifrst + 1 + ibcheck = ibcheck - nrow + call psb_amx(ictxt,ibcheck) + if (ibcheck > 0) then + write(0,*) 'Warning: does not look like a BLOCK distribution' + endif + + mglob = psb_cd_get_global_rows(desc_a) + nzt = psb_sp_get_nnzeros(a) + + npr = np + npc = 1 + call psb_loc_to_glob(a%ia1(1:nzt),desc_a,info,iact='I') + + call psb_dsludist_factor(mglob,nrow,nzt,ifrst,& + & a%aspk,a%ia2,a%ia1,p%iprcparm(slud_ptr_),& + & npr, npc, info) + if (info /= 0) then + ch_err='psb_slud_fact' + call psb_errpush(4110,name,a_err=ch_err,i_err=(/info,0,0,0,0/)) + goto 9999 + end if + + call psb_glob_to_loc(a%ia1(1:nzt),desc_a,info,iact='I') + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.psb_act_abort_) then + call psb_error() + return + end if + return + +end subroutine psb_dsludist_bld + diff --git a/psb_prec_mod.f90 b/psb_prec_mod.f90 index ad14d27c..3436454f 100644 --- a/psb_prec_mod.f90 +++ b/psb_prec_mod.f90 @@ -170,7 +170,7 @@ module psb_prec_mod subroutine psb_dmlprc_bld(a,desc_a,p,info) use psb_base_mod use psb_prec_type - type(psb_dspmat_type), intent(in), target :: a + type(psb_dspmat_type), intent(inout), target :: a type(psb_desc_type), intent(in), target :: desc_a type(psb_dbaseprc_type), intent(inout), target :: p integer, intent(out) :: info @@ -178,7 +178,7 @@ module psb_prec_mod subroutine psb_zmlprc_bld(a,desc_a,p,info) use psb_base_mod use psb_prec_type - type(psb_zspmat_type), intent(in), target :: a + type(psb_zspmat_type), intent(inout), target :: a type(psb_desc_type), intent(in), target :: desc_a type(psb_zbaseprc_type), intent(inout),target :: p integer, intent(out) :: info @@ -328,11 +328,30 @@ module psb_prec_mod end subroutine psb_zilu_bld end interface + interface psb_sludist_bld + subroutine psb_dsludist_bld(a,desc_a,p,info) + use psb_base_mod + use psb_prec_type + type(psb_dspmat_type), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_dbaseprc_type), intent(inout) :: p + integer, intent(out) :: info + end subroutine psb_dsludist_bld + subroutine psb_zsludist_bld(a,desc_a,p,info) + use psb_base_mod + use psb_prec_type + type(psb_zspmat_type), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_zbaseprc_type), intent(inout) :: p + integer, intent(out) :: info + end subroutine psb_zsludist_bld + end interface + interface psb_slu_bld subroutine psb_dslu_bld(a,desc_a,p,info) use psb_base_mod use psb_prec_type - type(psb_dspmat_type), intent(inout) :: a + type(psb_dspmat_type), intent(inout) :: a type(psb_desc_type), intent(in) :: desc_a type(psb_dbaseprc_type), intent(inout) :: p integer, intent(out) :: info @@ -351,7 +370,7 @@ module psb_prec_mod subroutine psb_dumf_bld(a,desc_a,p,info) use psb_base_mod use psb_prec_type - type(psb_dspmat_type), intent(inout) :: a + type(psb_dspmat_type), intent(inout) :: a type(psb_desc_type), intent(in) :: desc_a type(psb_dbaseprc_type), intent(inout) :: p integer, intent(out) :: info @@ -359,7 +378,7 @@ module psb_prec_mod subroutine psb_zumf_bld(a,desc_a,p,info) use psb_base_mod use psb_prec_type - type(psb_zspmat_type), intent(inout) :: a + type(psb_zspmat_type), intent(in) :: a type(psb_desc_type), intent(in) :: desc_a type(psb_zbaseprc_type), intent(inout) :: p integer, intent(out) :: info diff --git a/psb_prec_type.f90 b/psb_prec_type.f90 index d074f3c4..95918b64 100644 --- a/psb_prec_type.f90 +++ b/psb_prec_type.f90 @@ -72,12 +72,14 @@ module psb_prec_type integer, parameter :: renum_none_=0, renum_glb_=1, renum_gps_=2 !! 2 ints for 64 bit versions integer, parameter :: slu_ptr_=17, umf_symptr_=17, umf_numptr_=19 - integer, parameter :: ifpsz=20 + integer, parameter :: slud_ptr_=21 + integer, parameter :: ifpsz=24 ! Entries in dprcparm: ILU(E) epsilon, smoother omega integer, parameter :: fact_eps_=1, smooth_omega_=2 integer, parameter :: dfpsz=4 ! Factorization types: none, ILU(N), ILU(E), SuperLU, UMFPACK - integer, parameter :: f_none_=0,f_ilu_n_=1,f_ilu_e_=2,f_slu_=3,f_umf_=4 + integer, parameter :: f_none_=0,f_ilu_n_=1,f_ilu_e_=2,f_slu_=3 + integer, parameter :: f_umf_=4, f_slud_=5 ! Fields for sparse matrices ensembles: integer, parameter :: l_pr_=1, u_pr_=2, bp_ilu_avsz=2 integer, parameter :: ap_nd_=3, ac_=4, sm_pr_t_=5, sm_pr_=6 @@ -189,8 +191,9 @@ module psb_prec_type & ml_names(0:3)=(/'None ','Additive ','Multiplicative',& & 'New ML '/) character(len=15), parameter, private :: & - & fact_names(0:4)=(/'None ','ILU(n) ',& - & 'ILU(eps) ','Sparse SuperLU','UMFPACK Sp. LU'/) + & fact_names(0:5)=(/'None ','ILU(n) ',& + & 'ILU(eps) ','Sparse SuperLU','UMFPACK Sp. LU',& + & 'SuperLU_Dist '/) interface psb_base_precfree module procedure psb_dbase_precfree, psb_zbase_precfree @@ -290,7 +293,7 @@ contains write(iout,*) 'Fill level :',p%baseprecv(ilev)%iprcparm(ilu_fill_in_) case(f_ilu_e_) write(iout,*) 'Fill threshold :',p%baseprecv(ilev)%dprcparm(fact_eps_) - case(f_slu_,f_umf_) + case(f_slu_,f_umf_,f_slud_) case default write(iout,*) 'Should never get here!' end select @@ -358,7 +361,7 @@ contains !!$ write(iout,*) 'Fill level :',p%baseprecv(2)%iprcparm(ilu_fill_in_) !!$ case(f_ilu_e_) !!$ write(iout,*) 'Fill threshold :',p%baseprecv(2)%dprcparm(fact_eps_) -!!$ case(f_slu_,f_umf_) +!!$ case(f_slu_,f_umf_,f_slud_) !!$ case default !!$ write(iout,*) 'Should never get here!' !!$ end select @@ -434,7 +437,7 @@ contains write(iout,*) 'Fill level :',p%baseprecv(2)%iprcparm(ilu_fill_in_) case(f_ilu_e_) write(iout,*) 'Fill threshold :',p%baseprecv(2)%dprcparm(fact_eps_) - case(f_slu_,f_umf_) + case(f_slu_,f_umf_,f_slud_) case default write(iout,*) 'Should never get here!' end select @@ -502,7 +505,7 @@ contains !!$ write(iout,*) 'Fill level :',p%baseprecv(2)%iprcparm(ilu_fill_in_) !!$ case(f_ilu_e_) !!$ write(iout,*) 'Fill threshold :',p%baseprecv(2)%dprcparm(fact_eps_) -!!$ case(f_slu_,f_umf_) +!!$ case(f_slu_,f_umf_,f_slud_) !!$ case default !!$ write(iout,*) 'Should never get here!' !!$ end select @@ -613,7 +616,7 @@ contains integer, intent(in) :: ip logical :: is_legal_ml_fact - is_legal_ml_fact = ((ip>=f_ilu_n_).and.(ip<=f_umf_)) + is_legal_ml_fact = ((ip>=f_ilu_n_).and.(ip<=f_slud_)) return end function is_legal_ml_fact function is_legal_ml_lev(ip) @@ -743,6 +746,9 @@ contains if (p%iprcparm(f_type_)==f_slu_) then call psb_dslu_free(p%iprcparm(slu_ptr_),info) end if + if (p%iprcparm(f_type_)==f_slud_) then + call psb_dsludist_free(p%iprcparm(slud_ptr_),info) + end if if (p%iprcparm(f_type_)==f_umf_) then call psb_dumf_free(p%iprcparm(umf_symptr_),& & p%iprcparm(umf_numptr_),info) diff --git a/psb_slud_impl.c b/psb_slud_impl.c new file mode 100644 index 00000000..ff751a32 --- /dev/null +++ b/psb_slud_impl.c @@ -0,0 +1,377 @@ +/* + * MD2P4 + * Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS + * for + * Parallel Sparse BLAS v2.0 + * + * (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata + * Alfredo Buttari + * Daniela di Serafino Second University of Naples + * Pasqua D'Ambra ICAR-CNR + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the MD2P4 group or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ +/* This file is an interface to the SuperLUDist routines for sparse + factorization. It was obtaned by modifying the + c_fortran_dgssv.c file from the SuperLU source distribution; + original copyright terms reproduced below. + + PSBLAS v 2.0 */ + + +/* ===================== + +Copyright (c) 2003, The Regents of the University of California, through +Lawrence Berkeley National Laboratory (subject to receipt of any required +approvals from U.S. Dept. of Energy) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +(1) Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +(2) Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. +(3) Neither the name of Lawrence Berkeley National Laboratory, U.S. Dept. of +Energy nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +/* + * -- Distributed SuperLU routine (version 2.0) -- + * Lawrence Berkeley National Lab, Univ. of California Berkeley. + * March 15, 2003 + * + */ + +#ifdef Have_SLUDist_ +#include +#include "superlu_ddefs.h" + +#define HANDLE_SIZE 8 +/* kind of integer to hold a pointer. Use int. + This might need to be changed on 64-bit systems. */ +#ifdef LargeFptr +typedef long long fptr; /* 32-bit by default */ +#else +typedef int fptr; /* 32-bit by default */ +#endif + +typedef struct { + SuperMatrix *A; + LUstruct_t *LUstruct; + gridinfo_t *grid; + ScalePermstruct_t *ScalePermstruct; +} factors_t; + + +#else + +#include + +#endif + + +#ifdef Add_ +#define psb_dsludist_factor_ psb_dsludist_factor_ +#define psb_dsludist_solve_ psb_dsludist_solve_ +#define psb_dsludist_free_ psb_dsludist_free_ +#endif +#ifdef AddDouble_ +#define psb_dsludist_factor_ psb_dsludist_factor__ +#define psb_dsludist_solve_ psb_dsludist_solve__ +#define psb_dsludist_free_ psb_dsludist_free__ +#endif +#ifdef NoChange +#define psb_dsludist_factor_ psb_dsludist_factor +#define psb_dsludist_solve_ psb_dsludist_solve +#define psb_dsludist_free_ psb_dsludist_free +#endif + + + + +void +psb_dsludist_factor_(int *n, int *nl, int *nnzl, int *ffstr, + double *values, int *rowptr, int *colind, +#ifdef Have_SLUDist_ + fptr *f_factors, /* a handle containing the address + pointing to the factored matrices */ +#else + void *f_factors, +#endif + int *nprow, int *npcol, int *info) + +{ +/* + * This routine can be called from Fortran. + * performs LU decomposition. + * + * f_factors (input/output) fptr* + * On output contains the pointer pointing to + * the structure of the factored matrices. + * + */ + +#ifdef Have_SLUDist_ + SuperMatrix *A; + NRformat_loc *Astore; + + ScalePermstruct_t *ScalePermstruct; + LUstruct_t *LUstruct; + SOLVEstruct_t SOLVEstruct; + gridinfo_t *grid; + int i, panel_size, permc_spec, relax; + trans_t trans; + double drop_tol = 0.0,b[1],berr[1]; + mem_usage_t mem_usage; + superlu_options_t options; + SuperLUStat_t stat; + factors_t *LUfactors; + int fst_row; + int *icol,*irpt; + double *ival; + + trans = NOTRANS; +/* fprintf(stderr,"Entry to sludist_factor\n"); */ + grid = (gridinfo_t *) SUPERLU_MALLOC(sizeof(gridinfo_t)); + superlu_gridinit(MPI_COMM_WORLD, *nprow, *npcol, grid); + /* Initialize the statistics variables. */ + PStatInit(&stat); + fst_row = (*ffstr) -1; + /* Adjust to 0-based indexing */ + icol = (int *) malloc((*nnzl)*sizeof(int)); + irpt = (int *) malloc(((*nl)+1)*sizeof(int)); + ival = (double *) malloc((*nnzl)*sizeof(double)); + for (i = 0; i < *nnzl; ++i) ival[i] = values[i]; + for (i = 0; i < *nnzl; ++i) icol[i] = colind[i] -1; + for (i = 0; i <= *nl; ++i) irpt[i] = rowptr[i] -1; + + A = (SuperMatrix *) malloc(sizeof(SuperMatrix)); + dCreate_CompRowLoc_Matrix_dist(A, *n, *n, *nnzl, *nl, fst_row, + ival, icol, irpt, + SLU_NR_loc, SLU_D, SLU_GE); + + /* Initialize ScalePermstruct and LUstruct. */ + ScalePermstruct = (ScalePermstruct_t *) SUPERLU_MALLOC(sizeof(ScalePermstruct_t)); + LUstruct = (LUstruct_t *) SUPERLU_MALLOC(sizeof(LUstruct_t)); + ScalePermstructInit(*n,*n, ScalePermstruct); + LUstructInit(*n,*n, LUstruct); + + /* Set the default input options. */ + set_default_options_dist(&options); + options.IterRefine=NO; + options.PrintStat=NO; + + pdgssvx(&options, A, ScalePermstruct, b, *nl, 0, + grid, LUstruct, &SOLVEstruct, berr, &stat, info); + + if ( *info == 0 ) { + ; + } else { + printf("pdgssvx() error returns INFO= %d\n", *info); + if ( *info <= *n ) { /* factorization completes */ + ; + } + } + if (options.SolveInitialized) { + dSolveFinalize(&options,&SOLVEstruct); + } + + + /* Save the LU factors in the factors handle */ + LUfactors = (factors_t *) SUPERLU_MALLOC(sizeof(factors_t)); + LUfactors->LUstruct = LUstruct; + LUfactors->grid = grid; + LUfactors->A = A; + LUfactors->ScalePermstruct = ScalePermstruct; +/* fprintf(stderr,"slud factor: LUFactors %p \n",LUfactors); */ +/* fprintf(stderr,"slud factor: A %p %p\n",A,LUfactors->A); */ +/* fprintf(stderr,"slud factor: grid %p %p\n",grid,LUfactors->grid); */ +/* fprintf(stderr,"slud factor: LUstruct %p %p\n",LUstruct,LUfactors->LUstruct); */ + *f_factors = (fptr) LUfactors; + + PStatFree(&stat); +#else + fprintf(stderr," SLUDist Not Configured, fix make.inc and recompile\n"); + *info=-1; +#endif +} + + +void +psb_dsludist_solve_(int *itrans, int *n, int *nrhs, + double *b, int *ldb, +#ifdef Have_SLUDist_ + fptr *f_factors, /* a handle containing the address + pointing to the factored matrices */ +#else + void *f_factors, +#endif + int *info) + +{ +/* + * This routine can be called from Fortran. + * performs triangular solve + * + */ +#ifdef Have_SLUDist_ + SuperMatrix *A; + ScalePermstruct_t *ScalePermstruct; + LUstruct_t *LUstruct; + SOLVEstruct_t SOLVEstruct; + gridinfo_t *grid; + int i, panel_size, permc_spec, relax; + trans_t trans; + double drop_tol = 0.0; + double *berr; + mem_usage_t mem_usage; + superlu_options_t options; + SuperLUStat_t stat; + factors_t *LUfactors; + + LUfactors = (factors_t *) *f_factors ; + A = LUfactors->A ; + LUstruct = LUfactors->LUstruct ; + grid = LUfactors->grid ; + + ScalePermstruct = LUfactors->ScalePermstruct; +/* fprintf(stderr,"slud solve: LUFactors %p \n",LUfactors); */ +/* fprintf(stderr,"slud solve: A %p %p\n",A,LUfactors->A); */ +/* fprintf(stderr,"slud solve: grid %p %p\n",grid,LUfactors->grid); */ +/* fprintf(stderr,"slud solve: LUstruct %p %p\n",LUstruct,LUfactors->LUstruct); */ + + + if (*itrans == 0) { + trans = NOTRANS; + } else if (*itrans ==1) { + trans = TRANS; + } else if (*itrans ==2) { + trans = CONJ; + } else { + trans = NOTRANS; + } + +/* fprintf(stderr,"Entry to sludist_solve\n"); */ + berr = (double *) malloc((*nrhs) *sizeof(double)); + + /* Initialize the statistics variables. */ + PStatInit(&stat); + + /* Set the default input options. */ + set_default_options_dist(&options); + options.IterRefine = NO; + options.Fact = FACTORED; + options.PrintStat = NO; + + pdgssvx(&options, A, ScalePermstruct, b, *ldb, *nrhs, + grid, LUstruct, &SOLVEstruct, berr, &stat, info); + +/* fprintf(stderr,"Double check: after solve %d %lf\n",*info,berr[0]); */ + if (options.SolveInitialized) { + dSolveFinalize(&options,&SOLVEstruct); + } + PStatFree(&stat); + free(berr); +#else + fprintf(stderr," SLUDist Not Configured, fix make.inc and recompile\n"); + *info=-1; +#endif + +} + + +void +psb_dsludist_free_( +#ifdef Have_SLUDist_ + fptr *f_factors, /* a handle containing the address + pointing to the factored matrices */ +#else + void *f_factors, +#endif + int *info) + +{ +/* + * This routine can be called from Fortran. + * + * free all storage in the end + * + */ +#ifdef Have_SLUDist_ + SuperMatrix *A; + ScalePermstruct_t *ScalePermstruct; + LUstruct_t *LUstruct; + SOLVEstruct_t SOLVEstruct; + gridinfo_t *grid; + int i, panel_size, permc_spec, relax; + trans_t trans; + double drop_tol = 0.0; + double *berr; + mem_usage_t mem_usage; + superlu_options_t options; + SuperLUStat_t stat; + factors_t *LUfactors; + + LUfactors = (factors_t *) *f_factors ; + A = LUfactors->A ; + LUstruct = LUfactors->LUstruct ; + grid = LUfactors->grid ; + ScalePermstruct = LUfactors->ScalePermstruct; + + Destroy_CompRowLoc_Matrix_dist(A); + ScalePermstructFree(ScalePermstruct); + LUstructFree(LUstruct); + superlu_gridexit(grid); + + free(grid); + free(LUstruct); + free(LUfactors); + +#else + fprintf(stderr," SLUDist Not Configured, fix make.inc and recompile\n"); + *info=-1; +#endif +} + + diff --git a/psb_zbaseprc_bld.f90 b/psb_zbaseprc_bld.f90 index d5e51df1..f529db1b 100644 --- a/psb_zbaseprc_bld.f90 +++ b/psb_zbaseprc_bld.f90 @@ -68,10 +68,11 @@ subroutine psb_zbaseprc_bld(a,desc_a,p,info,upd) if (debug) write(0,*) 'Entering baseprc_bld' info = 0 int_err(1) = 0 - ictxt = psb_cd_get_context(desc_a) - n_row = psb_cd_get_local_rows(desc_a) - n_col = psb_cd_get_local_cols(desc_a) - mglob = psb_cd_get_global_rows(desc_a) + ictxt = psb_cd_get_context(desc_a) + n_row = psb_cd_get_local_rows(desc_a) + n_col = psb_cd_get_local_cols(desc_a) + mglob = psb_cd_get_global_rows(desc_a) + if (debug) write(0,*) 'Preconditioner Blacs_gridinfo' call psb_info(ictxt, me, np) @@ -89,8 +90,6 @@ subroutine psb_zbaseprc_bld(a,desc_a,p,info,upd) ! ! Should add check to ensure all procs have the same... ! - ! ALso should define symbolic names for the preconditioners. - ! call psb_check_def(p%iprcparm(p_type_),'base_prec',& & diag_,is_legal_base_prec) @@ -132,6 +131,11 @@ subroutine psb_zbaseprc_bld(a,desc_a,p,info,upd) & renum_none_,is_legal_renum) call psb_check_def(p%iprcparm(f_type_),'fact',& & f_ilu_n_,is_legal_ml_fact) + + if (p%iprcparm(f_type_)==f_slud_) then + p%iprcparm(n_ovr_) = 0 + p%iprcparm(jac_sweeps_) = 1 + end if if (debug) write(0,*)me, ': Calling PSB_BJAC_BLD' if (debug) call psb_barrier(ictxt) diff --git a/psb_zbjac_aply.f90 b/psb_zbjac_aply.f90 index 422bca17..38ea14c4 100644 --- a/psb_zbjac_aply.f90 +++ b/psb_zbjac_aply.f90 @@ -68,23 +68,23 @@ subroutine psb_zbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) info = 0 call psb_erractionsave(err_act) - ictxt=desc_data%matrix_data(psb_ctxt_) + ictxt=psb_cd_get_context(desc_data) call psb_info(ictxt, me, np) diagl='U' diagu='U' - select case(trans) - case('N','n') - case('T','t','C','c') + select case(toupper(trans)) + case('N') + case('T','C') case default call psb_errpush(40,name) goto 9999 end select - n_row=desc_data%matrix_data(psb_n_row_) - n_col=desc_data%matrix_data(psb_n_col_) + n_row = psb_cd_get_local_rows(desc_data) + n_col = psb_cd_get_local_cols(desc_data) if (n_col <= size(work)) then ww => work(1:n_col) @@ -113,8 +113,8 @@ subroutine psb_zbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) select case(prec%iprcparm(f_type_)) case(f_ilu_n_,f_ilu_e_) - select case(trans) - case('N','n') + select case(toupper(trans)) + case('N') call psb_spsm(zone,prec%av(l_pr_),x,zzero,ww,desc_data,info,& & trans='N',unit=diagl,choice=psb_none_,work=aux) @@ -124,7 +124,7 @@ subroutine psb_zbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) & trans='N',unit=diagu,choice=psb_none_, work=aux) if(info /=0) goto 9999 - case('T','t','C','c') + case('T','C') call psb_spsm(zone,prec%av(u_pr_),x,zzero,ww,desc_data,info,& & trans=trans,unit=diagu,choice=psb_none_, work=aux) if(info /=0) goto 9999 @@ -139,11 +139,30 @@ subroutine psb_zbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) ww(1:n_row) = x(1:n_row) - select case(trans) - case('N','n') + select case(toupper(trans)) + case('N') call psb_zslu_solve(0,n_row,1,ww,n_row,prec%iprcparm(slu_ptr_),info) - case('T','t','C','c') + case('T') call psb_zslu_solve(1,n_row,1,ww,n_row,prec%iprcparm(slu_ptr_),info) + case('C') + call psb_zslu_solve(2,n_row,1,ww,n_row,prec%iprcparm(slu_ptr_),info) + end select + + if(info /=0) goto 9999 + call psb_geaxpby(alpha,ww,beta,y,desc_data,info) + + case(f_slud_) + +!!$ write(0,*) 'Calling SLUDist_solve ',n_row + ww(1:n_row) = x(1:n_row) + + select case(toupper(trans)) + case('N') + call psb_zsludist_solve(0,n_row,1,ww,n_row,prec%iprcparm(slud_ptr_),info) + case('T') + call psb_zsludist_solve(1,n_row,1,ww,n_row,prec%iprcparm(slud_ptr_),info) + case('C') + call psb_zsludist_solve(2,n_row,1,ww,n_row,prec%iprcparm(slud_ptr_),info) end select if(info /=0) goto 9999 @@ -152,11 +171,13 @@ subroutine psb_zbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) case (f_umf_) - select case(trans) - case('N','n') + select case(toupper(trans)) + case('N') call psb_zumf_solve(0,n_row,ww,x,n_row,prec%iprcparm(umf_numptr_),info) - case('T','t','C','c') + case('T') call psb_zumf_solve(1,n_row,ww,x,n_row,prec%iprcparm(umf_numptr_),info) + case('C') + call psb_zumf_solve(2,n_row,ww,x,n_row,prec%iprcparm(umf_numptr_),info) end select if(info /=0) goto 9999 @@ -204,6 +225,10 @@ subroutine psb_zbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) if(info /=0) goto 9999 end do + case(f_slud_) + write(0,*) 'No sense in having SLUDist with JAC_SWEEPS >1' + info=4010 + goto 9999 case(f_slu_) do i=1, prec%iprcparm(jac_sweeps_) ! X(k+1) = M^-1*(b-N*X(k)) @@ -239,7 +264,9 @@ subroutine psb_zbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) else - + info = 10 + call psb_errpush(info,name,& + & i_err=(/2,prec%iprcparm(jac_sweeps_),0,0,0/)) goto 9999 endif diff --git a/psb_zbjac_bld.f90 b/psb_zbjac_bld.f90 index 0fbf7775..dff519bb 100644 --- a/psb_zbjac_bld.f90 +++ b/psb_zbjac_bld.f90 @@ -379,6 +379,59 @@ subroutine psb_zbjac_bld(a,desc_a,p,upd,info) end if + case(f_slud_) + + atmp%fida='COO' + call psb_csdp(a,atmp,info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='psb_csdp') + goto 9999 + end if + + n_row = psb_cd_get_local_rows(p%desc_data) + n_col = psb_cd_get_local_cols(p%desc_data) + call psb_rwextd(n_row,atmp,info,b=blck,rowscale=.false.) + + if (p%iprcparm(jac_sweeps_) > 1) then + !------------------------------------------------------------------ + ! Split AC=M+N N off-diagonal part + ! Output in COO format. + call psb_sp_clip(atmp,p%av(ap_nd_),info,& + & jmin=atmp%m+1,rscale=.false.,cscale=.false.) + + call psb_ipcoo2csr(p%av(ap_nd_),info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_ipcoo2csr') + goto 9999 + end if + + k = psb_sp_get_nnzeros(p%av(ap_nd_)) + call psb_sum(ictxt,k) + + if (k == 0) then + ! If the off diagonal part is emtpy, there's no point + ! in doing multiple Jacobi sweeps. This is certain + ! to happen when running on a single processor. + p%iprcparm(jac_sweeps_) = 1 + end if + endif + +!!$ nztmp = psb_sp_get_nnzeros(atmp) +!!$ call psb_loc_to_glob(atmp%ia2(1:nztmp),p%desc_data,info,iact='I') + if (info == 0) call psb_ipcoo2csr(atmp,info) + if (info == 0) call psb_sludist_bld(atmp,p%desc_data,p,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='slu_bld') + goto 9999 + end if + + call psb_sp_free(atmp,info) + if(info/=0) then + call psb_errpush(4010,name,a_err='psb_sp_free') + goto 9999 + end if +!!$ + case(f_umf_) @@ -397,6 +450,7 @@ subroutine psb_zbjac_bld(a,desc_a,p,upd,info) !------------------------------------------------------------------ ! Split AC=M+N N off-diagonal part ! Output in COO format. +!!$ write(0,*) 'bjac_bld:' size(p%av),ap_nd_ call psb_sp_clip(atmp,p%av(ap_nd_),info,& & jmin=atmp%m+1,rscale=.false.,cscale=.false.) diff --git a/psb_zbldaggrmat.f90 b/psb_zbldaggrmat.f90 index 1936b15e..ff500c46 100644 --- a/psb_zbldaggrmat.f90 +++ b/psb_zbldaggrmat.f90 @@ -263,6 +263,33 @@ contains goto 9999 end if + if (.false.) then + !if(.not.associated(p%av(ap_nd_)%aspk)) p%iprcparm(jac_sweeps_) = 1 + !------------------------------------------------------------------ + ! Split AC=M+N N off-diagonal part + ! Output in COO format. + call psb_sp_clip(ac,p%av(ap_nd_),info,& + & jmin=ac%m+1,rscale=.false.,cscale=.false.) + + call psb_ipcoo2csr(p%av(ap_nd_),info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_ipcoo2csr') + goto 9999 + end if + + k = psb_sp_get_nnzeros(p%av(ap_nd_)) + call psb_sum(ictxt,k) + + if (k == 0) then + ! If the off diagonal part is emtpy, there's no point + ! in doing multiple Jacobi sweeps. This is certain + ! to happen when running on a single processor. + p%iprcparm(jac_sweeps_) = 1 + end if + !write(0,*) 'operations in bldaggrmat are ok !' + !------------------------------------------------------------------ + end if + else write(0,*) 'Unknown p%iprcparm(coarse_mat) in aggregate_sp',p%iprcparm(coarse_mat_) @@ -764,6 +791,29 @@ contains deallocate(ivall,nzbr,idisp) + if (.false.) then + ! Split AC=M+N N off-diagonal part + ! Output in COO format. + call psb_sp_clip(ac,p%av(ap_nd_),info,& + & jmin=ac%m+1,rscale=.false.,cscale=.false.) + + call psb_ipcoo2csr(p%av(ap_nd_),info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_ipcoo2csr') + goto 9999 + end if + + k = psb_sp_get_nnzeros(p%av(ap_nd_)) + call psb_sum(ictxt,k) + + if (k == 0) then + ! If the off diagonal part is emtpy, there's no point + ! in doing multiple Jacobi sweeps. This is certain + ! to happen when running on a single processor. + p%iprcparm(jac_sweeps_) = 1 + end if + end if + if (np>1) then nzl = psb_sp_get_nnzeros(am1) diff --git a/psb_zprecset.f90 b/psb_zprecset.f90 index a6d39b53..6f2b6fd5 100644 --- a/psb_zprecset.f90 +++ b/psb_zprecset.f90 @@ -134,6 +134,7 @@ subroutine psb_zprecset(p,ptype,info,iv,rs,rv,ilev,nlev) if (isz >= 2) p%baseprecv(ilev_)%iprcparm(restr_) = iv(2) if (isz >= 3) p%baseprecv(ilev_)%iprcparm(prol_) = iv(3) if (isz >= 4) p%baseprecv(ilev_)%iprcparm(f_type_) = iv(4) + if (isz >= 5) p%baseprecv(ilev_)%iprcparm(jac_sweeps_) = iv(5) ! Do not consider renum for the time being. !!$ if (isz >= 5) p%baseprecv(ilev_)%iprcparm(iren_) = iv(5) end if diff --git a/psb_zslu_bld.f90 b/psb_zslu_bld.f90 index 6804a37b..140b5dfc 100644 --- a/psb_zslu_bld.f90 +++ b/psb_zslu_bld.f90 @@ -40,7 +40,7 @@ subroutine psb_zslu_bld(a,desc_a,p,info) implicit none - type(psb_zspmat_type), intent(inout) :: a + type(psb_zspmat_type), intent(in) :: a type(psb_desc_type), intent(in) :: desc_a type(psb_zbaseprc_type), intent(inout) :: p integer, intent(out) :: info diff --git a/psb_zslud_bld.f90 b/psb_zslud_bld.f90 new file mode 100644 index 00000000..87888eca --- /dev/null +++ b/psb_zslud_bld.f90 @@ -0,0 +1,122 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_zsludist_bld(a,desc_a,p,info) + use psb_base_mod + use psb_prec_mod, mld_protect_name => psb_zsludist_bld + + implicit none + + type(psb_zspmat_type), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_zbaseprc_type), intent(inout) :: p + integer, intent(out) :: info + + integer :: i,j,nza,nzb,nzt,ictxt,me,np,err_act,& + & mglob,ifrst,ibcheck,nrow,ncol,npr,npc, ip + logical, parameter :: debug=.false. + character(len=20) :: name, ch_err + + if(psb_get_errstatus().ne.0) return + info=0 + name='psb_zslu_bld' + call psb_erractionsave(err_act) + + ictxt = psb_cd_get_context(desc_a) + + call psb_info(ictxt, me, np) + + if (toupper(a%fida) /= 'CSR') then + write(0,*) 'Unimplemented input to SLU_BLD' + goto 9999 + endif + + + ! + ! WARN we need to check for a BLOCK distribution. + ! + nrow = psb_cd_get_local_rows(desc_a) + ncol = psb_cd_get_local_cols(desc_a) + ifrst = desc_a%loc_to_glob(1) + ibcheck = desc_a%loc_to_glob(nrow) - ifrst + 1 + ibcheck = ibcheck - nrow + call psb_amx(ictxt,ibcheck) + if (ibcheck > 0) then + write(0,*) 'Warning: does not look like a BLOCK distribution' + endif + + mglob = psb_cd_get_global_rows(desc_a) + nzt = psb_sp_get_nnzeros(a) + + call psb_loc_to_glob(a%ia1(1:nzt),desc_a,info,iact='I') + + npr = np + npc = 1 + ip = floor(sqrt(dble(np))) + do + if (ip <= 1) exit + if (mod(np,ip)==0) then + npr = np/ip + npc = ip + exit + end if + ip = ip - 1 + end do +!!$ write(0,*) 'Process grid : ',npr,npc + call psb_zsludist_factor(mglob,nrow,nzt,ifrst,& + & a%aspk,a%ia2,a%ia1,p%iprcparm(slud_ptr_),& + & npr, npc, info) + if (info /= 0) then + ch_err='psb_slud_fact' + call psb_errpush(4110,name,a_err=ch_err,i_err=(/info,0,0,0,0/)) + goto 9999 + end if + + call psb_glob_to_loc(a%ia1(1:nzt),desc_a,info,iact='I') + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.psb_act_abort_) then + call psb_error() + return + end if + return + +end subroutine psb_zsludist_bld + diff --git a/psb_zslud_impl.c b/psb_zslud_impl.c new file mode 100644 index 00000000..1cf94f6c --- /dev/null +++ b/psb_zslud_impl.c @@ -0,0 +1,376 @@ +/* + * MD2P4 + * Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS + * for + * Parallel Sparse BLAS v2.0 + * + * (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata + * Alfredo Buttari + * Daniela di Serafino Second University of Naples + * Pasqua D'Ambra ICAR-CNR + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the MD2P4 group or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ +/* This file is an interface to the SuperLUDist routines for sparse + factorization. It was obtaned by modifying the + c_fortran_dgssv.c file from the SuperLU source distribution; + original copyright terms reproduced below. + + PSBLAS v 2.0 */ + + +/* ===================== + +Copyright (c) 2003, The Regents of the University of California, through +Lawrence Berkeley National Laboratory (subject to receipt of any required +approvals from U.S. Dept. of Energy) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +(1) Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +(2) Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. +(3) Neither the name of Lawrence Berkeley National Laboratory, U.S. Dept. of +Energy nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +/* + * -- Distributed SuperLU routine (version 2.0) -- + * Lawrence Berkeley National Lab, Univ. of California Berkeley. + * March 15, 2003 + * + */ + +#ifdef Have_SLUDist_ +#include +#include "superlu_zdefs.h" + +#define HANDLE_SIZE 8 +/* kind of integer to hold a pointer. Use int. + This might need to be changed on 64-bit systems. */ +#ifdef LargeFptr +typedef long long fptr; /* 32-bit by default */ +#else +typedef int fptr; /* 32-bit by default */ +#endif + +typedef struct { + SuperMatrix *A; + LUstruct_t *LUstruct; + gridinfo_t *grid; + ScalePermstruct_t *ScalePermstruct; +} factors_t; + + +#else + +#include + +#endif + + +#ifdef Add_ +#define psb_zsludist_factor_ psb_zsludist_factor_ +#define psb_zsludist_solve_ psb_zsludist_solve_ +#define psb_zsludist_free_ psb_zsludist_free_ +#endif +#ifdef AddDouble_ +#define psb_zsludist_factor_ psb_zsludist_factor__ +#define psb_zsludist_solve_ psb_zsludist_solve__ +#define psb_zsludist_free_ psb_zsludist_free__ +#endif +#ifdef NoChange +#define psb_zsludist_factor_ psb_zsludist_factor +#define psb_zsludist_solve_ psb_zsludist_solve +#define psb_zsludist_free_ psb_zsludist_free +#endif + + + + +void +psb_zsludist_factor_(int *n, int *nl, int *nnzl, int *ffstr, + doublecomplex *values, int *rowptr, int *colind, +#ifdef Have_SLUDist_ + fptr *f_factors, /* a handle containing the address + pointing to the factored matrices */ +#else + void *f_factors, +#endif + int *nprow, int *npcol, int *info) + +{ +/* + * This routine can be called from Fortran. + * performs LU decomposition. + * + * f_factors (input/output) fptr* + * On output contains the pointer pointing to + * the structure of the factored matrices. + * + */ + +#ifdef Have_SLUDist_ + SuperMatrix *A; + NRformat_loc *Astore; + + ScalePermstruct_t *ScalePermstruct; + LUstruct_t *LUstruct; + SOLVEstruct_t SOLVEstruct; + gridinfo_t *grid; + int i, panel_size, permc_spec, relax; + trans_t trans; + double drop_tol = 0.0,berr[1]; + mem_usage_t mem_usage; + superlu_options_t options; + SuperLUStat_t stat; + factors_t *LUfactors; + int fst_row; + int *icol,*irpt; + doublecomplex *ival,b[1]; + + trans = NOTRANS; + grid = (gridinfo_t *) SUPERLU_MALLOC(sizeof(gridinfo_t)); + superlu_gridinit(MPI_COMM_WORLD, *nprow, *npcol, grid); + /* Initialize the statistics variables. */ + PStatInit(&stat); + fst_row = (*ffstr) -1; + /* Adjust to 0-based indexing */ + icol = (int *) malloc((*nnzl)*sizeof(int)); + irpt = (int *) malloc(((*nl)+1)*sizeof(int)); + ival = (doublecomplex *) malloc((*nnzl)*sizeof(doublecomplex)); + for (i = 0; i < *nnzl; ++i) ival[i] = values[i]; + for (i = 0; i < *nnzl; ++i) icol[i] = colind[i] -1; + for (i = 0; i <= *nl; ++i) irpt[i] = rowptr[i] -1; + + A = (SuperMatrix *) malloc(sizeof(SuperMatrix)); + zCreate_CompRowLoc_Matrix_dist(A, *n, *n, *nnzl, *nl, fst_row, + ival, icol, irpt, + SLU_NR_loc, SLU_Z, SLU_GE); + + /* Initialize ScalePermstruct and LUstruct. */ + ScalePermstruct = (ScalePermstruct_t *) SUPERLU_MALLOC(sizeof(ScalePermstruct_t)); + LUstruct = (LUstruct_t *) SUPERLU_MALLOC(sizeof(LUstruct_t)); + ScalePermstructInit(*n,*n, ScalePermstruct); + LUstructInit(*n,*n, LUstruct); + + /* Set the default input options. */ + set_default_options_dist(&options); + options.IterRefine=NO; + options.PrintStat=NO; + + pzgssvx(&options, A, ScalePermstruct, b, *nl, 0, + grid, LUstruct, &SOLVEstruct, berr, &stat, info); + + if ( *info == 0 ) { + ; + } else { + printf("pzgssvx() error returns INFO= %d\n", *info); + if ( *info <= *n ) { /* factorization completes */ + ; + } + } + if (options.SolveInitialized) { + zSolveFinalize(&options,&SOLVEstruct); + } + + + /* Save the LU factors in the factors handle */ + LUfactors = (factors_t *) SUPERLU_MALLOC(sizeof(factors_t)); + LUfactors->LUstruct = LUstruct; + LUfactors->grid = grid; + LUfactors->A = A; + LUfactors->ScalePermstruct = ScalePermstruct; +/* fprintf(stderr,"slud factor: LUFactors %p \n",LUfactors); */ +/* fprintf(stderr,"slud factor: A %p %p\n",A,LUfactors->A); */ +/* fprintf(stderr,"slud factor: grid %p %p\n",grid,LUfactors->grid); */ +/* fprintf(stderr,"slud factor: LUstruct %p %p\n",LUstruct,LUfactors->LUstruct); */ + *f_factors = (fptr) LUfactors; + + PStatFree(&stat); +#else + fprintf(stderr," SLUDist Not Configured, fix make.inc and recompile\n"); + *info=-1; +#endif +} + + +void +psb_zsludist_solve_(int *itrans, int *n, int *nrhs, + doublecomplex *b, int *ldb, +#ifdef Have_SLUDist_ + fptr *f_factors, /* a handle containing the address + pointing to the factored matrices */ +#else + void *f_factors, +#endif + int *info) + +{ +/* + * This routine can be called from Fortran. + * performs triangular solve + * + */ +#ifdef Have_SLUDist_ + SuperMatrix *A; + ScalePermstruct_t *ScalePermstruct; + LUstruct_t *LUstruct; + SOLVEstruct_t SOLVEstruct; + gridinfo_t *grid; + int i, panel_size, permc_spec, relax; + trans_t trans; + double drop_tol = 0.0; + double *berr; + mem_usage_t mem_usage; + superlu_options_t options; + SuperLUStat_t stat; + factors_t *LUfactors; + + LUfactors = (factors_t *) *f_factors ; + A = LUfactors->A ; + LUstruct = LUfactors->LUstruct ; + grid = LUfactors->grid ; + + ScalePermstruct = LUfactors->ScalePermstruct; +/* fprintf(stderr,"slud solve: LUFactors %p \n",LUfactors); */ +/* fprintf(stderr,"slud solve: A %p %p\n",A,LUfactors->A); */ +/* fprintf(stderr,"slud solve: grid %p %p\n",grid,LUfactors->grid); */ +/* fprintf(stderr,"slud solve: LUstruct %p %p\n",LUstruct,LUfactors->LUstruct); */ + + + if (*itrans == 0) { + trans = NOTRANS; + } else if (*itrans ==1) { + trans = TRANS; + } else if (*itrans ==2) { + trans = CONJ; + } else { + trans = NOTRANS; + } + +/* fprintf(stderr,"Entry to sludist_solve\n"); */ + berr = (double *) malloc((*nrhs) *sizeof(double)); + + /* Initialize the statistics variables. */ + PStatInit(&stat); + + /* Set the default input options. */ + set_default_options_dist(&options); + options.IterRefine = NO; + options.Fact = FACTORED; + options.PrintStat = NO; + + pzgssvx(&options, A, ScalePermstruct, b, *ldb, *nrhs, + grid, LUstruct, &SOLVEstruct, berr, &stat, info); + +/* fprintf(stderr,"Double check: after solve %d %lf\n",*info,berr[0]); */ + if (options.SolveInitialized) { + zSolveFinalize(&options,&SOLVEstruct); + } + PStatFree(&stat); + free(berr); +#else + fprintf(stderr," SLUDist Not Configured, fix make.inc and recompile\n"); + *info=-1; +#endif + +} + + +void +psb_zsludist_free_( +#ifdef Have_SLUDist_ + fptr *f_factors, /* a handle containing the address + pointing to the factored matrices */ +#else + void *f_factors, +#endif + int *info) + +{ +/* + * This routine can be called from Fortran. + * + * free all storage in the end + * + */ +#ifdef Have_SLUDist_ + SuperMatrix *A; + ScalePermstruct_t *ScalePermstruct; + LUstruct_t *LUstruct; + SOLVEstruct_t SOLVEstruct; + gridinfo_t *grid; + int i, panel_size, permc_spec, relax; + trans_t trans; + double drop_tol = 0.0; + double *berr; + mem_usage_t mem_usage; + superlu_options_t options; + SuperLUStat_t stat; + factors_t *LUfactors; + + LUfactors = (factors_t *) *f_factors ; + A = LUfactors->A ; + LUstruct = LUfactors->LUstruct ; + grid = LUfactors->grid ; + ScalePermstruct = LUfactors->ScalePermstruct; + + Destroy_CompRowLoc_Matrix_dist(A); + ScalePermstructFree(ScalePermstruct); + LUstructFree(LUstruct); + superlu_gridexit(grid); + + free(grid); + free(LUstruct); + free(LUfactors); + +#else + fprintf(stderr," SLUDist Not Configured, fix make.inc and recompile\n"); + *info=-1; +#endif +} + +