From 6b38c925802ee7611c7f86313f17c5c0ee27e131 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Thu, 3 Jan 2008 11:12:21 +0000 Subject: [PATCH] Added TRANSPOSE paths to Block Jacobi and AS preconditioner application. --- mlprec/mld_das_aply.f90 | 232 +++++++++++++++++++++++++++----------- mlprec/mld_dbjac_aply.f90 | 192 +++++++++++++++++++++---------- mlprec/mld_zas_aply.f90 | 230 ++++++++++++++++++++++++++----------- mlprec/mld_zbjac_aply.f90 | 219 ++++++++++++++++++++++++++--------- 4 files changed, 633 insertions(+), 240 deletions(-) diff --git a/mlprec/mld_das_aply.f90 b/mlprec/mld_das_aply.f90 index da5f0c80..e7a95dee 100644 --- a/mlprec/mld_das_aply.f90 +++ b/mlprec/mld_das_aply.f90 @@ -103,15 +103,6 @@ subroutine mld_das_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) call psb_info(ictxt, me, np) - select case(trans) - case('N','n') - case('T','t','C','c') - case default - info=40 - int_err(1)=6 - ch_err(2:2)=trans - goto 9999 - end select select case(prec%iprcparm(mld_prec_type_)) @@ -178,84 +169,199 @@ subroutine mld_das_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) tx(1:nrow_d) = x(1:nrow_d) tx(nrow_d+1:isz) = dzero - ! - ! Get the overlap entries of tx (tx==x) - ! - if (prec%iprcparm(mld_sub_restr_)==psb_halo_) then - call psb_halo(tx,prec%desc_data,info,work=aux,data=psb_comm_ext_) - if(info /=0) then - info=4010 - ch_err='psb_halo' - goto 9999 - end if - else if (prec%iprcparm(mld_sub_restr_) /= psb_none_) then - call psb_errpush(4001,name,a_err='Invalid mld_sub_restr_') - goto 9999 - end if + select case(toupper(trans)) + case('N') - ! - ! If required, reorder tx according to the row/column permutation of the - ! local extended matrix, stored into the permutation vector prec%perm - ! - if (prec%iprcparm(mld_sub_ren_)>0) then - call psb_gelp('n',prec%perm,tx,info) - if(info /=0) then - info=4010 - ch_err='psb_gelp' + ! + ! Get the overlap entries of tx (tx==x) + ! + if (prec%iprcparm(mld_sub_restr_)==psb_halo_) then + call psb_halo(tx,prec%desc_data,info,work=aux,data=psb_comm_ext_) + if(info /=0) then + info=4010 + ch_err='psb_halo' + goto 9999 + end if + else if (prec%iprcparm(mld_sub_restr_) /= psb_none_) then + call psb_errpush(4001,name,a_err='Invalid mld_sub_restr_') goto 9999 end if - endif - ! - ! Apply to tx the block-Jacobi preconditioner/solver (multiple sweeps of the - ! block-Jacobi solver can be applied at the coarsest level of a multilevel - ! preconditioner). The resulting vector is ty. - ! - call mld_bjac_aply(done,prec,tx,dzero,ty,prec%desc_data,trans,aux,info) - if(info /= 0) then - info=4010 - ch_err='mld_bjac_aply' - goto 9999 - end if + ! + ! If required, reorder tx according to the row/column permutation of the + ! local extended matrix, stored into the permutation vector prec%perm + ! + if (prec%iprcparm(mld_sub_ren_)>0) then + call psb_gelp('n',prec%perm,tx,info) + if(info /=0) then + info=4010 + ch_err='psb_gelp' + goto 9999 + end if + endif - ! - ! Apply to ty the inverse permutation of prec%perm - ! - if (prec%iprcparm(mld_sub_ren_)>0) then - call psb_gelp('n',prec%invperm,ty,info) + ! + ! Apply to tx the block-Jacobi preconditioner/solver (multiple sweeps of the + ! block-Jacobi solver can be applied at the coarsest level of a multilevel + ! preconditioner). The resulting vector is ty. + ! + call mld_bjac_aply(done,prec,tx,dzero,ty,prec%desc_data,toupper(trans),aux,info) if(info /= 0) then info=4010 - ch_err='psb_gelp' + ch_err='mld_bjac_aply' goto 9999 end if - endif - select case (prec%iprcparm(mld_sub_prol_)) + ! + ! Apply to ty the inverse permutation of prec%perm + ! + if (prec%iprcparm(mld_sub_ren_)>0) then + call psb_gelp('n',prec%invperm,ty,info) + if(info /= 0) then + info=4010 + ch_err='psb_gelp' + goto 9999 + end if + endif + + select case (prec%iprcparm(mld_sub_prol_)) + + case(psb_none_) + ! + ! Would work anyway, but since it is supposed to do nothing ... + ! call psb_ovrl(ty,prec%desc_data,info,& + ! & update=prec%iprcparm(mld_sub_prol_),work=aux) + + + case(psb_sum_,psb_avg_) + ! + ! Update the overlap of ty + ! + call psb_ovrl(ty,prec%desc_data,info,& + & update=prec%iprcparm(mld_sub_prol_),work=aux) + if(info /=0) then + info=4010 + ch_err='psb_ovrl' + goto 9999 + end if + + case default + call psb_errpush(4001,name,a_err='Invalid mld_sub_prol_') + goto 9999 + end select - case(psb_none_) + case('T','C') + + + ! + ! With transpose, we have to do it here ! - ! Would work anyway, but since it is supposed to do nothing ... - ! call psb_ovrl(ty,prec%desc_data,info,& - ! & update=prec%iprcparm(mld_sub_prol_),work=aux) + + select case (prec%iprcparm(mld_sub_prol_)) + + case(psb_none_) + ! + ! Do nothing + + case(psb_sum_) + ! + ! Transpose of sum is halo + ! + call psb_halo(tx,prec%desc_data,info,work=aux,data=psb_comm_ext_) + if(info /=0) then + info=4010 + ch_err='psb_halo' + goto 9999 + end if + + case(psb_avg_) + ! + ! Tricky one: first we have to scale the overlap entries, + ! which we can do by assignind mode=0, i.e. no communication + ! (hence only scaling), then we do the halo + ! + call psb_ovrl(tx,prec%desc_data,info,& + & update=psb_avg_,work=aux,mode=0) + if(info /=0) then + info=4010 + ch_err='psb_ovrl' + goto 9999 + end if + call psb_halo(tx,prec%desc_data,info,work=aux,data=psb_comm_ext_) + if(info /=0) then + info=4010 + ch_err='psb_halo' + goto 9999 + end if + + case default + call psb_errpush(4001,name,a_err='Invalid mld_sub_prol_') + goto 9999 + end select - case(psb_sum_,psb_avg_) ! - ! Update the overlap of ty + ! If required, reorder tx according to the row/column permutation of the + ! local extended matrix, stored into the permutation vector prec%perm + ! + if (prec%iprcparm(mld_sub_ren_)>0) then + call psb_gelp('n',prec%perm,tx,info) + if(info /=0) then + info=4010 + ch_err='psb_gelp' + goto 9999 + end if + endif + ! - call psb_ovrl(ty,prec%desc_data,info,& - & update=prec%iprcparm(mld_sub_prol_),work=aux) - if(info /=0) then + ! Apply to tx the block-Jacobi preconditioner/solver (multiple sweeps of the + ! block-Jacobi solver can be applied at the coarsest level of a multilevel + ! preconditioner). The resulting vector is ty. + ! + call mld_bjac_aply(done,prec,tx,dzero,ty,prec%desc_data,toupper(trans),aux,info) + if(info /= 0) then info=4010 - ch_err='psb_ovrl' + ch_err='mld_bjac_aply' + goto 9999 + end if + + ! + ! Apply to ty the inverse permutation of prec%perm + ! + if (prec%iprcparm(mld_sub_ren_)>0) then + call psb_gelp('n',prec%invperm,ty,info) + if(info /= 0) then + info=4010 + ch_err='psb_gelp' + goto 9999 + end if + endif + + ! + ! With transpose, we have to do it here + ! + if (prec%iprcparm(mld_sub_restr_) == psb_halo_) then + call psb_ovrl(ty,prec%desc_data,info,& + & update=psb_sum_,work=aux) + if(info /=0) then + info=4010 + ch_err='psb_ovrl' + goto 9999 + end if + else if (prec%iprcparm(mld_sub_restr_) /= psb_none_) then + call psb_errpush(4001,name,a_err='Invalid mld_sub_restr_') goto 9999 end if case default - call psb_errpush(4001,name,a_err='Invalid mld_sub_prol_') + info=40 + int_err(1)=6 + ch_err(2:2)=trans goto 9999 end select + + ! ! Compute y = beta*y + alpha*ty (ty==K^(-1)*tx) ! diff --git a/mlprec/mld_dbjac_aply.f90 b/mlprec/mld_dbjac_aply.f90 index 268fe6d5..1f2df5c9 100644 --- a/mlprec/mld_dbjac_aply.f90 +++ b/mlprec/mld_dbjac_aply.f90 @@ -161,7 +161,7 @@ subroutine mld_dbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) real(kind(1.d0)), intent(inout) :: x(*) end subroutine mld_dumf_solve end interface - + name='mld_dbjac_aply' info = 0 call psb_erractionsave(err_act) @@ -224,12 +224,12 @@ subroutine mld_dbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) & trans='N',unit='L',diag=prec%d,choice=psb_none_,work=aux) if (info == 0) call psb_spsm(alpha,prec%av(mld_u_pr_),ww,beta,y,desc_data,info,& & trans='N',unit='U',choice=psb_none_, work=aux) - + case('T','C') call psb_spsm(done,prec%av(mld_u_pr_),x,dzero,ww,desc_data,info,& - & trans=trans,unit='L',diag=prec%d,choice=psb_none_,work=aux) + & trans=toupper(trans),unit='L',diag=prec%d,choice=psb_none_,work=aux) if (info == 0) call psb_spsm(alpha,prec%av(mld_l_pr_),ww,beta,y,desc_data,info,& - & trans=trans,unit='U',choice=psb_none_,work=aux) + & trans=toupper(trans),unit='U',choice=psb_none_,work=aux) case default call psb_errpush(4001,name,a_err='Invalid TRANS in ILU subsolve') goto 9999 @@ -256,7 +256,7 @@ subroutine mld_dbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) end select if (info ==0) call psb_geaxpby(alpha,ww,beta,y,desc_data,info) - + case(mld_sludist_) ! ! Solve a distributed linear system with the LU factorization. @@ -338,26 +338,55 @@ subroutine mld_dbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) ! ! Use ILU(k)/MILU(k)/ILU(k,t) on the blocks. ! - do i=1, prec%iprcparm(mld_smooth_sweeps_) - ! - ! Compute Y(j+1) = D^(-1)*(X-ND*Y(j)), where D and ND are the - ! block diagonal part and the remaining part of the local matrix - ! and Y(j) is the approximate solution at sweep j. - ! - ty(1:n_row) = x(1:n_row) - call psb_spmm(-done,prec%av(mld_ap_nd_),tx,done,ty,& - & prec%desc_data,info,work=aux) - if (info /=0) exit - call psb_spsm(done,prec%av(mld_l_pr_),ty,dzero,ww,& - & prec%desc_data,info,& - & trans='N',unit='L',diag=prec%d,choice=psb_none_,work=aux) - if (info /=0) exit - call psb_spsm(done,prec%av(mld_u_pr_),ww,dzero,tx,& - & prec%desc_data,info,& - & trans='N',unit='U',choice=psb_none_,work=aux) - if (info /=0) exit - end do - + + select case(toupper(trans)) + case('N') + do i=1, prec%iprcparm(mld_smooth_sweeps_) + ! + ! Compute Y(j+1) = D^(-1)*(X-ND*Y(j)), where D and ND are the + ! block diagonal part and the remaining part of the local matrix + ! and Y(j) is the approximate solution at sweep j. + ! + ty(1:n_row) = x(1:n_row) + call psb_spmm(-done,prec%av(mld_ap_nd_),tx,done,ty,& + & prec%desc_data,info,work=aux) + if (info /=0) exit + call psb_spsm(done,prec%av(mld_l_pr_),ty,dzero,ww,& + & prec%desc_data,info,& + & trans='N',unit='L',diag=prec%d,choice=psb_none_,work=aux) + if (info /=0) exit + call psb_spsm(done,prec%av(mld_u_pr_),ww,dzero,tx,& + & prec%desc_data,info,& + & trans='N',unit='U',choice=psb_none_,work=aux) + if (info /=0) exit + end do + case('T','C') + do i=1, prec%iprcparm(mld_smooth_sweeps_) + ! + ! Compute Y(j+1) = D^(-1)*(X-ND*Y(j)), where D and ND are the + ! block diagonal part and the remaining part of the local matrix + ! and Y(j) is the approximate solution at sweep j. + ! + ty(1:n_row) = x(1:n_row) + call psb_spmm(-done,prec%av(mld_ap_nd_),tx,done,ty,& + & prec%desc_data,info,work=aux,trans=toupper(trans)) + if (info /=0) exit + call psb_spsm(done,prec%av(mld_u_pr_),ty,dzero,ww,& + & prec%desc_data,info,& + & trans=toupper(trans),unit='L',diag=prec%d,choice=psb_none_,work=aux) + if (info /=0) exit + call psb_spsm(done,prec%av(mld_l_pr_),ww,dzero,tx,& + & prec%desc_data,info,& + & trans=toupper(trans),unit='U',choice=psb_none_,work=aux) + if (info /=0) exit + end do + + case default + call psb_errpush(4001,name,a_err='Invalid TRANS in ILU subsolve') + goto 9999 + end select + + case(mld_sludist_) ! ! Wrong choice: SuperLU_DIST @@ -371,43 +400,90 @@ subroutine mld_dbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) ! Use the LU factorization from SuperLU. ! - do i=1, prec%iprcparm(mld_smooth_sweeps_) - ! - ! Compute Y(k+1) = D^(-1)*(X-ND*Y(k)), where D and ND are the - ! block diagonal part and the remaining part of the local matrix - ! and Y(j) is the approximate solution at sweep j. - ! - ty(1:n_row) = x(1:n_row) - call psb_spmm(-done,prec%av(mld_ap_nd_),tx,done,ty,& - & prec%desc_data,info,work=aux) - if(info /= 0) exit - - call mld_dslu_solve(0,n_row,1,ty,n_row,prec%iprcparm(mld_slu_ptr_),info) - if(info /= 0) exit - tx(1:n_row) = ty(1:n_row) - end do + select case(toupper(trans)) + case('N') + do i=1, prec%iprcparm(mld_smooth_sweeps_) + ! + ! Compute Y(k+1) = D^(-1)*(X-ND*Y(k)), where D and ND are the + ! block diagonal part and the remaining part of the local matrix + ! and Y(j) is the approximate solution at sweep j. + ! + ty(1:n_row) = x(1:n_row) + call psb_spmm(-done,prec%av(mld_ap_nd_),tx,done,ty,& + & prec%desc_data,info,work=aux) + if(info /= 0) exit + + call mld_dslu_solve(0,n_row,1,ty,n_row,prec%iprcparm(mld_slu_ptr_),info) + if(info /= 0) exit + tx(1:n_row) = ty(1:n_row) + end do + case('T','C') + do i=1, prec%iprcparm(mld_smooth_sweeps_) + ! + ! Compute Y(k+1) = D^(-1)*(X-ND*Y(k)), where D and ND are the + ! block diagonal part and the remaining part of the local matrix + ! and Y(j) is the approximate solution at sweep j. + ! + ty(1:n_row) = x(1:n_row) + call psb_spmm(-done,prec%av(mld_ap_nd_),tx,done,ty,& + & prec%desc_data,info,work=aux,trans=toupper(trans)) + if(info /= 0) exit + call mld_dslu_solve(1,n_row,1,ty,n_row,prec%iprcparm(mld_slu_ptr_),info) + if(info /= 0) exit + tx(1:n_row) = ty(1:n_row) + end do + + case default + call psb_errpush(4001,name,a_err='Invalid TRANS in SLU subsolve') + goto 9999 + end select + case(mld_umf_) ! ! Use the LU factorization from UMFPACK. ! - do i=1, prec%iprcparm(mld_smooth_sweeps_) - ! - ! Compute Y(k+1) = D^(-1)*(X-ND*Y(k)), where D and ND are the - ! block diagonal part and the remaining part of the local matrix - ! and Y(j) is the approximate solution at sweep j. - ! - ty(1:n_row) = x(1:n_row) - call psb_spmm(-done,prec%av(mld_ap_nd_),tx,done,ty,& - & prec%desc_data,info,work=aux) - if (info /= 0) exit - - call mld_dumf_solve(0,n_row,ww,ty,n_row,& - & prec%iprcparm(mld_umf_numptr_),info) - if (info /= 0) exit - tx(1:n_row) = ww(1:n_row) - end do + select case(toupper(trans)) + case('N') + do i=1, prec%iprcparm(mld_smooth_sweeps_) + ! + ! Compute Y(k+1) = D^(-1)*(X-ND*Y(k)), where D and ND are the + ! block diagonal part and the remaining part of the local matrix + ! and Y(j) is the approximate solution at sweep j. + ! + ty(1:n_row) = x(1:n_row) + call psb_spmm(-done,prec%av(mld_ap_nd_),tx,done,ty,& + & prec%desc_data,info,work=aux) + if (info /= 0) exit + + call mld_dumf_solve(0,n_row,ww,ty,n_row,& + & prec%iprcparm(mld_umf_numptr_),info) + if (info /= 0) exit + tx(1:n_row) = ww(1:n_row) + end do + case('T','C') + do i=1, prec%iprcparm(mld_smooth_sweeps_) + ! + ! Compute Y(k+1) = D^(-1)*(X-ND*Y(k)), where D and ND are the + ! block diagonal part and the remaining part of the local matrix + ! and Y(j) is the approximate solution at sweep j. + ! + ty(1:n_row) = x(1:n_row) + call psb_spmm(-done,prec%av(mld_ap_nd_),tx,done,ty,& + & prec%desc_data,info,work=aux,trans=toupper(trans)) + if (info /= 0) exit + + call mld_dumf_solve(1,n_row,ww,ty,n_row,& + & prec%iprcparm(mld_umf_numptr_),info) + if (info /= 0) exit + tx(1:n_row) = ww(1:n_row) + end do + + case default + call psb_errpush(4001,name,a_err='Invalid TRANS in UMF subsolve') + goto 9999 + end select case default call psb_errpush(4001,name,a_err='Invalid mld_sub_solve_') @@ -428,8 +504,8 @@ subroutine mld_dbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) info=4001 call psb_errpush(info,name,a_err='final cleanup with Jacobi sweeps > 1') goto 9999 - end if - + end if + else info = 10 diff --git a/mlprec/mld_zas_aply.f90 b/mlprec/mld_zas_aply.f90 index bc4c481c..72d50675 100644 --- a/mlprec/mld_zas_aply.f90 +++ b/mlprec/mld_zas_aply.f90 @@ -103,15 +103,6 @@ subroutine mld_zas_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) call psb_info(ictxt, me, np) - select case(trans) - case('N','n') - case('T','t','C','c') - case default - info=40 - int_err(1)=6 - ch_err(2:2)=trans - goto 9999 - end select select case(prec%iprcparm(mld_prec_type_)) @@ -178,81 +169,192 @@ subroutine mld_zas_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) tx(1:nrow_d) = x(1:nrow_d) tx(nrow_d+1:isz) = dzero - ! - ! Get the overlap entries of tx (tx==x) - ! - if (prec%iprcparm(mld_sub_restr_)==psb_halo_) then - call psb_halo(tx,prec%desc_data,info,work=aux,data=psb_comm_ext_) - if(info /=0) then - info=4010 - ch_err='psb_halo' - goto 9999 - end if - else if (prec%iprcparm(mld_sub_restr_) /= psb_none_) then - call psb_errpush(4001,name,a_err='Invalid mld_sub_restr_') - goto 9999 - end if - - ! - ! If required, reorder tx according to the row/column permutation of the - ! local extended matrix, stored into the permutation vector prec%perm - ! - if (prec%iprcparm(mld_sub_ren_)>0) then - call psb_gelp('n',prec%perm,tx,info) - if(info /=0) then - info=4010 - ch_err='psb_gelp' + select case(toupper(trans)) + case('N') + ! + ! Get the overlap entries of tx (tx==x) + ! + if (prec%iprcparm(mld_sub_restr_)==psb_halo_) then + call psb_halo(tx,prec%desc_data,info,work=aux,data=psb_comm_ext_) + if(info /=0) then + info=4010 + ch_err='psb_halo' + goto 9999 + end if + else if (prec%iprcparm(mld_sub_restr_) /= psb_none_) then + call psb_errpush(4001,name,a_err='Invalid mld_sub_restr_') goto 9999 end if - endif - ! - ! Apply to tx the block-Jacobi preconditioner/solver (multiple sweeps of the - ! block-Jacobi solver can be applied at the coarsest level of a multilevel - ! preconditioner). The resulting vector is ty. - ! - call mld_bjac_aply(zone,prec,tx,zzero,ty,prec%desc_data,trans,aux,info) - if(info /= 0) then - info=4010 - ch_err='mld_bjac_aply' - goto 9999 - end if + ! + ! If required, reorder tx according to the row/column permutation of the + ! local extended matrix, stored into the permutation vector prec%perm + ! + if (prec%iprcparm(mld_sub_ren_)>0) then + call psb_gelp('n',prec%perm,tx,info) + if(info /=0) then + info=4010 + ch_err='psb_gelp' + goto 9999 + end if + endif - ! - ! Apply to ty the inverse permutation of prec%perm - ! - if (prec%iprcparm(mld_sub_ren_)>0) then - call psb_gelp('n',prec%invperm,ty,info) + ! + ! Apply to tx the block-Jacobi preconditioner/solver (multiple sweeps of the + ! block-Jacobi solver can be applied at the coarsest level of a multilevel + ! preconditioner). The resulting vector is ty. + ! + call mld_bjac_aply(zone,prec,tx,zzero,ty,prec%desc_data,toupper(trans),aux,info) if(info /= 0) then info=4010 - ch_err='psb_gelp' + ch_err='mld_bjac_aply' goto 9999 end if - endif - select case (prec%iprcparm(mld_sub_prol_)) + ! + ! Apply to ty the inverse permutation of prec%perm + ! + if (prec%iprcparm(mld_sub_ren_)>0) then + call psb_gelp('n',prec%invperm,ty,info) + if(info /= 0) then + info=4010 + ch_err='psb_gelp' + goto 9999 + end if + endif + + select case (prec%iprcparm(mld_sub_prol_)) + + case(psb_none_) + ! + ! Would work anyway, but since it is supposed to do nothing ... + ! call psb_ovrl(ty,prec%desc_data,info,& + ! & update=prec%iprcparm(mld_sub_prol_),work=aux) + + + case(psb_sum_,psb_avg_) + ! + ! Update the overlap of ty + ! + call psb_ovrl(ty,prec%desc_data,info,& + & update=prec%iprcparm(mld_sub_prol_),work=aux) + if(info /=0) then + info=4010 + ch_err='psb_ovrl' + goto 9999 + end if + + case default + call psb_errpush(4001,name,a_err='Invalid mld_sub_prol_') + goto 9999 + end select + + case('T','C') - case(psb_none_) + ! + ! With transpose, we have to do it here ! - ! Would work anyway, but since it is supposed to do nothing ... - ! call psb_ovrl(ty,prec%desc_data,info,& - ! & update=prec%iprcparm(mld_sub_prol_),work=aux) + + select case (prec%iprcparm(mld_sub_prol_)) + + case(psb_none_) + ! + ! Do nothing + + case(psb_sum_) + ! + ! Transpose of sum is halo + ! + call psb_halo(tx,prec%desc_data,info,work=aux,data=psb_comm_ext_) + if(info /=0) then + info=4010 + ch_err='psb_halo' + goto 9999 + end if + + case(psb_avg_) + ! + ! Tricky one: first we have to scale the overlap entries, + ! which we can do by assignind mode=0, i.e. no communication + ! (hence only scaling), then we do the halo + ! + call psb_ovrl(tx,prec%desc_data,info,& + & update=psb_avg_,work=aux,mode=0) + if(info /=0) then + info=4010 + ch_err='psb_ovrl' + goto 9999 + end if + call psb_halo(tx,prec%desc_data,info,work=aux,data=psb_comm_ext_) + if(info /=0) then + info=4010 + ch_err='psb_halo' + goto 9999 + end if + + case default + call psb_errpush(4001,name,a_err='Invalid mld_sub_prol_') + goto 9999 + end select - case(psb_sum_,psb_avg_) ! - ! Update the overlap of ty + ! If required, reorder tx according to the row/column permutation of the + ! local extended matrix, stored into the permutation vector prec%perm ! - call psb_ovrl(ty,prec%desc_data,info,& - & update=prec%iprcparm(mld_sub_prol_),work=aux) - if(info /=0) then + if (prec%iprcparm(mld_sub_ren_)>0) then + call psb_gelp('n',prec%perm,tx,info) + if(info /=0) then + info=4010 + ch_err='psb_gelp' + goto 9999 + end if + endif + + ! + ! Apply to tx the block-Jacobi preconditioner/solver (multiple sweeps of the + ! block-Jacobi solver can be applied at the coarsest level of a multilevel + ! preconditioner). The resulting vector is ty. + ! + call mld_bjac_aply(zone,prec,tx,zzero,ty,prec%desc_data,toupper(trans),aux,info) + if(info /= 0) then info=4010 - ch_err='psb_ovrl' + ch_err='mld_bjac_aply' + goto 9999 + end if + + ! + ! Apply to ty the inverse permutation of prec%perm + ! + if (prec%iprcparm(mld_sub_ren_)>0) then + call psb_gelp('n',prec%invperm,ty,info) + if(info /= 0) then + info=4010 + ch_err='psb_gelp' + goto 9999 + end if + endif + + ! + ! With transpose, we have to do it here + ! + if (prec%iprcparm(mld_sub_restr_) == psb_halo_) then + call psb_ovrl(ty,prec%desc_data,info,& + & update=psb_sum_,work=aux) + if(info /=0) then + info=4010 + ch_err='psb_ovrl' + goto 9999 + end if + else if (prec%iprcparm(mld_sub_restr_) /= psb_none_) then + call psb_errpush(4001,name,a_err='Invalid mld_sub_restr_') goto 9999 end if case default - call psb_errpush(4001,name,a_err='Invalid mld_sub_prol_') + info=40 + int_err(1)=6 + ch_err(2:2)=trans goto 9999 end select diff --git a/mlprec/mld_zbjac_aply.f90 b/mlprec/mld_zbjac_aply.f90 index d9fba4ee..b07f88c9 100644 --- a/mlprec/mld_zbjac_aply.f90 +++ b/mlprec/mld_zbjac_aply.f90 @@ -224,7 +224,7 @@ subroutine mld_zbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) & trans='N',unit='L',diag=prec%d,choice=psb_none_,work=aux) if (info == 0) call psb_spsm(alpha,prec%av(mld_u_pr_),ww,beta,y,desc_data,info,& & trans='N',unit='U',choice=psb_none_, work=aux) - + case('T','C') call psb_spsm(zone,prec%av(mld_u_pr_),x,zzero,ww,desc_data,info,& & trans=trans,unit='L',diag=prec%d,choice=psb_none_, work=aux) @@ -258,7 +258,7 @@ subroutine mld_zbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) end select if (info ==0) call psb_geaxpby(alpha,ww,beta,y,desc_data,info) - + case(mld_sludist_) ! ! Solve a distributed linear system with the LU factorization. @@ -339,31 +339,60 @@ subroutine mld_zbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) tx = zzero ty = zzero + select case(prec%iprcparm(mld_sub_solve_)) case(mld_ilu_n_,mld_milu_n_,mld_ilu_t_) ! ! Use ILU(k)/MILU(k)/ILU(k,t) on the blocks. ! - do i=1, prec%iprcparm(mld_smooth_sweeps_) - ! - ! Compute Y(j+1) = D^(-1)*(X-ND*Y(j)), where D and ND are the - ! block diagonal part and the remaining part of the local matrix - ! and Y(j) is the approximate solution at sweep j. - ! - ty(1:n_row) = x(1:n_row) - call psb_spmm(-zone,prec%av(mld_ap_nd_),tx,zone,ty,& - & prec%desc_data,info,work=aux) - if (info /=0) exit - call psb_spsm(zone,prec%av(mld_l_pr_),ty,zzero,ww,& - & prec%desc_data,info,& - & trans='N',unit='L',diag=prec%d,choice=psb_none_,work=aux) - if (info /=0) exit - call psb_spsm(zone,prec%av(mld_u_pr_),ww,zzero,tx,& - & prec%desc_data,info,& - & trans='N',unit='U',choice=psb_none_,work=aux) - if (info /=0) exit - end do - + select case(toupper(trans)) + case('N') + + do i=1, prec%iprcparm(mld_smooth_sweeps_) + ! + ! Compute Y(j+1) = D^(-1)*(X-ND*Y(j)), where D and ND are the + ! block diagonal part and the remaining part of the local matrix + ! and Y(j) is the approximate solution at sweep j. + ! + ty(1:n_row) = x(1:n_row) + call psb_spmm(-zone,prec%av(mld_ap_nd_),tx,zone,ty,& + & prec%desc_data,info,work=aux) + if (info /=0) exit + call psb_spsm(zone,prec%av(mld_l_pr_),ty,zzero,ww,& + & prec%desc_data,info,& + & trans='N',unit='L',diag=prec%d,choice=psb_none_,work=aux) + if (info /=0) exit + call psb_spsm(zone,prec%av(mld_u_pr_),ww,zzero,tx,& + & prec%desc_data,info,& + & trans='N',unit='U',choice=psb_none_,work=aux) + if (info /=0) exit + end do + case('T','C') + do i=1, prec%iprcparm(mld_smooth_sweeps_) + ! + ! Compute Y(j+1) = D^(-1)*(X-ND*Y(j)), where D and ND are the + ! block diagonal part and the remaining part of the local matrix + ! and Y(j) is the approximate solution at sweep j. + ! + ty(1:n_row) = x(1:n_row) + call psb_spmm(-zone,prec%av(mld_ap_nd_),tx,zone,ty,& + & prec%desc_data,info,work=aux,trans=toupper(trans)) + if (info /=0) exit + call psb_spsm(zone,prec%av(mld_u_pr_),ty,zzero,ww,& + & prec%desc_data,info,& + & trans=toupper(trans),unit='L',diag=prec%d,choice=psb_none_,work=aux) + if (info /=0) exit + call psb_spsm(zone,prec%av(mld_l_pr_),ww,zzero,tx,& + & prec%desc_data,info,& + & trans=toupper(trans),unit='U',choice=psb_none_,work=aux) + if (info /=0) exit + end do + case default + call psb_errpush(4001,name,a_err='Invalid TRANS in ILU subsolve') + goto 9999 + end select + + case(mld_sludist_) ! ! Wrong choice: SuperLU_DIST @@ -377,43 +406,123 @@ subroutine mld_zbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) ! Use the LU factorization from SuperLU. ! - do i=1, prec%iprcparm(mld_smooth_sweeps_) - ! - ! Compute Y(k+1) = D^(-1)*(X-ND*Y(k)), where D and ND are the - ! block diagonal part and the remaining part of the local matrix - ! and Y(j) is the approximate solution at sweep j. - ! - ty(1:n_row) = x(1:n_row) - call psb_spmm(-zone,prec%av(mld_ap_nd_),tx,zone,ty,& - & prec%desc_data,info,work=aux) - if (info /= 0) exit - - call mld_zslu_solve(0,n_row,1,ty,n_row,prec%iprcparm(mld_slu_ptr_),info) - if (info /= 0) exit - tx(1:n_row) = ty(1:n_row) - end do + select case(toupper(trans)) + case('N') + do i=1, prec%iprcparm(mld_smooth_sweeps_) + ! + ! Compute Y(k+1) = D^(-1)*(X-ND*Y(k)), where D and ND are the + ! block diagonal part and the remaining part of the local matrix + ! and Y(j) is the approximate solution at sweep j. + ! + ty(1:n_row) = x(1:n_row) + call psb_spmm(-zone,prec%av(mld_ap_nd_),tx,zone,ty,& + & prec%desc_data,info,work=aux) + if (info /= 0) exit + + call mld_zslu_solve(0,n_row,1,ty,n_row,prec%iprcparm(mld_slu_ptr_),info) + if (info /= 0) exit + tx(1:n_row) = ty(1:n_row) + end do + case('T') + do i=1, prec%iprcparm(mld_smooth_sweeps_) + ! + ! Compute Y(k+1) = D^(-1)*(X-ND*Y(k)), where D and ND are the + ! block diagonal part and the remaining part of the local matrix + ! and Y(j) is the approximate solution at sweep j. + ! + ty(1:n_row) = x(1:n_row) + call psb_spmm(-zone,prec%av(mld_ap_nd_),tx,zone,ty,& + & prec%desc_data,info,work=aux,trans=toupper(trans)) + if (info /= 0) exit + + call mld_zslu_solve(1,n_row,1,ty,n_row,prec%iprcparm(mld_slu_ptr_),info) + if (info /= 0) exit + tx(1:n_row) = ty(1:n_row) + end do + case('C') + do i=1, prec%iprcparm(mld_smooth_sweeps_) + ! + ! Compute Y(k+1) = D^(-1)*(X-ND*Y(k)), where D and ND are the + ! block diagonal part and the remaining part of the local matrix + ! and Y(j) is the approximate solution at sweep j. + ! + ty(1:n_row) = x(1:n_row) + call psb_spmm(-zone,prec%av(mld_ap_nd_),tx,zone,ty,& + & prec%desc_data,info,work=aux,trans=toupper(trans)) + if (info /= 0) exit + + call mld_zslu_solve(2,n_row,1,ty,n_row,prec%iprcparm(mld_slu_ptr_),info) + if (info /= 0) exit + tx(1:n_row) = ty(1:n_row) + end do + + case default + call psb_errpush(4001,name,a_err='Invalid TRANS in SLU subsolve') + goto 9999 + end select case(mld_umf_) ! ! Use the LU factorization from UMFPACK. ! - do i=1, prec%iprcparm(mld_smooth_sweeps_) - ! - ! Compute Y(k+1) = D^(-1)*(X-ND*Y(k)), where D and ND are the - ! block diagonal part and the remaining part of the local matrix - ! and Y(j) is the approximate solution at sweep j. - ! - ty(1:n_row) = x(1:n_row) - call psb_spmm(-zone,prec%av(mld_ap_nd_),tx,zone,ty,& - & prec%desc_data,info,work=aux) - if (info /= 0) exit - - call mld_zumf_solve(0,n_row,ww,ty,n_row,& - & prec%iprcparm(mld_umf_numptr_),info) - if (info /= 0) exit - tx(1:n_row) = ww(1:n_row) - end do + select case(toupper(trans)) + case('N') + do i=1, prec%iprcparm(mld_smooth_sweeps_) + ! + ! Compute Y(k+1) = D^(-1)*(X-ND*Y(k)), where D and ND are the + ! block diagonal part and the remaining part of the local matrix + ! and Y(j) is the approximate solution at sweep j. + ! + ty(1:n_row) = x(1:n_row) + call psb_spmm(-zone,prec%av(mld_ap_nd_),tx,zone,ty,& + & prec%desc_data,info,work=aux) + if (info /= 0) exit + + call mld_zumf_solve(0,n_row,ww,ty,n_row,& + & prec%iprcparm(mld_umf_numptr_),info) + if (info /= 0) exit + tx(1:n_row) = ww(1:n_row) + end do + case('T') + do i=1, prec%iprcparm(mld_smooth_sweeps_) + ! + ! Compute Y(k+1) = D^(-1)*(X-ND*Y(k)), where D and ND are the + ! block diagonal part and the remaining part of the local matrix + ! and Y(j) is the approximate solution at sweep j. + ! + ty(1:n_row) = x(1:n_row) + call psb_spmm(-zone,prec%av(mld_ap_nd_),tx,zone,ty,& + & prec%desc_data,info,work=aux,trans=toupper(trans)) + if (info /= 0) exit + + call mld_zumf_solve(1,n_row,ww,ty,n_row,& + & prec%iprcparm(mld_umf_numptr_),info) + if (info /= 0) exit + tx(1:n_row) = ww(1:n_row) + end do + case('C') + do i=1, prec%iprcparm(mld_smooth_sweeps_) + ! + ! Compute Y(k+1) = D^(-1)*(X-ND*Y(k)), where D and ND are the + ! block diagonal part and the remaining part of the local matrix + ! and Y(j) is the approximate solution at sweep j. + ! + ty(1:n_row) = x(1:n_row) + call psb_spmm(-zone,prec%av(mld_ap_nd_),tx,zone,ty,& + & prec%desc_data,info,work=aux,trans=toupper(trans)) + if (info /= 0) exit + + call mld_zumf_solve(2,n_row,ww,ty,n_row,& + & prec%iprcparm(mld_umf_numptr_),info) + if (info /= 0) exit + tx(1:n_row) = ww(1:n_row) + end do + + case default + call psb_errpush(4001,name,a_err='Invalid TRANS in UMF subsolve') + goto 9999 + end select case default call psb_errpush(4001,name,a_err='Invalid mld_sub_solve_') @@ -434,8 +543,8 @@ subroutine mld_zbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) info=4001 call psb_errpush(info,name,a_err='final cleanup with Jacobi sweeps > 1') goto 9999 - end if - + end if + else info = 10