|
|
|
@ -270,12 +270,12 @@ subroutine psb_ld_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx)
|
|
|
|
|
valsnd(tsdx(proc+1)) = acoo%val(k)
|
|
|
|
|
end if
|
|
|
|
|
end do
|
|
|
|
|
call acoo%set_nzeros(nzd)
|
|
|
|
|
!
|
|
|
|
|
! Put halo entries in global numbering
|
|
|
|
|
!
|
|
|
|
|
call desc_r%indxmap%l2gip(jasnd(1:iszs),info)
|
|
|
|
|
call p_desc_c%indxmap%l2gip(iasnd(1:iszs),info)
|
|
|
|
|
call acoo%set_nzeros(nzd)
|
|
|
|
|
! And exchange data.
|
|
|
|
|
! Normally we'll use our SIMPLE A2AV and not MPI, because
|
|
|
|
|
! the communication pattern is sparse, so ours is more
|
|
|
|
@ -423,6 +423,12 @@ subroutine psb_d_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx)
|
|
|
|
|
character(len=5) :: outfmt_
|
|
|
|
|
integer(psb_ipk_) :: debug_level, debug_unit
|
|
|
|
|
character(len=20) :: name, ch_err
|
|
|
|
|
logical, parameter :: do_timings=.true.
|
|
|
|
|
integer(psb_ipk_), save :: idx_phase1=-1, idx_a2av=-1, idx_phase2=-1, idx_phase3=-1, &
|
|
|
|
|
& idx_refine1=-1, idx_refine2=-1, idx_refine3=-1
|
|
|
|
|
integer(psb_ipk_), save :: iters=0
|
|
|
|
|
real(psb_dpk_) :: t0, t1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if(psb_get_errstatus() /= 0) return
|
|
|
|
|
info=psb_success_
|
|
|
|
@ -441,6 +447,22 @@ subroutine psb_d_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx)
|
|
|
|
|
|
|
|
|
|
if (debug_level >= psb_debug_outer_) &
|
|
|
|
|
& write(debug_unit,*) me,' ',trim(name),': Start'
|
|
|
|
|
if ((do_timings).and.(idx_phase1==-1)) &
|
|
|
|
|
& idx_phase1 = psb_get_timer_idx("D_GLB_TRANS: phase1 ")
|
|
|
|
|
if ((do_timings).and.(idx_phase2==-1)) &
|
|
|
|
|
& idx_phase2 = psb_get_timer_idx("D_GLB_TRANS: phase2 ")
|
|
|
|
|
if ((do_timings).and.(idx_phase3==-1)) &
|
|
|
|
|
& idx_phase3 = psb_get_timer_idx("D_GLB_TRANS: phase3 ")
|
|
|
|
|
if ((do_timings).and.(idx_a2av==-1)) &
|
|
|
|
|
& idx_a2av = psb_get_timer_idx("D_GLB_TRANS: a2av ")
|
|
|
|
|
if ((do_timings).and.(idx_refine1==-1)) &
|
|
|
|
|
& idx_refine1 = psb_get_timer_idx("D_GLB_TRANS: refine1 ")
|
|
|
|
|
if ((do_timings).and.(idx_refine2==-1)) &
|
|
|
|
|
& idx_refine2 = psb_get_timer_idx("D_GLB_TRANS: refine2 ")
|
|
|
|
|
if ((do_timings).and.(idx_refine3==-1)) &
|
|
|
|
|
& idx_refine3 = psb_get_timer_idx("D_GLB_TRANS: refine3 ")
|
|
|
|
|
|
|
|
|
|
if (do_timings) call psb_tic(idx_phase1)
|
|
|
|
|
|
|
|
|
|
if (present(desc_c)) then
|
|
|
|
|
p_desc_c => desc_c
|
|
|
|
@ -540,6 +562,8 @@ subroutine psb_d_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx)
|
|
|
|
|
call psb_errpush(info,name,a_err='ensure_size')
|
|
|
|
|
goto 9999
|
|
|
|
|
end if
|
|
|
|
|
if (do_timings) call psb_toc(idx_phase1)
|
|
|
|
|
if (do_timings) call psb_tic(idx_phase2)
|
|
|
|
|
|
|
|
|
|
!
|
|
|
|
|
! Now, transpose the matrix, then split between itself
|
|
|
|
@ -569,12 +593,12 @@ subroutine psb_d_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx)
|
|
|
|
|
valsnd(tsdx(proc+1)) = acoo%val(k)
|
|
|
|
|
end if
|
|
|
|
|
end do
|
|
|
|
|
call acoo%set_nzeros(nzd)
|
|
|
|
|
!
|
|
|
|
|
! Put halo entries in global numbering
|
|
|
|
|
!
|
|
|
|
|
call desc_r%indxmap%l2gip(jasnd(1:iszs),info)
|
|
|
|
|
call p_desc_c%indxmap%l2gip(iasnd(1:iszs),info)
|
|
|
|
|
call acoo%set_nzeros(nzd)
|
|
|
|
|
! And exchange data.
|
|
|
|
|
! Normally we'll use our SIMPLE A2AV and not MPI, because
|
|
|
|
|
! the communication pattern is sparse, so ours is more
|
|
|
|
@ -582,6 +606,9 @@ subroutine psb_d_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx)
|
|
|
|
|
nzl = acoo%get_nzeros()
|
|
|
|
|
call acoo%ensure_size(nzl+iszr)
|
|
|
|
|
|
|
|
|
|
if (do_timings) call psb_toc(idx_phase2)
|
|
|
|
|
if (do_timings) call psb_tic(idx_a2av)
|
|
|
|
|
|
|
|
|
|
select case(psb_get_sp_a2av_alg())
|
|
|
|
|
case(psb_sp_a2av_smpl_triad_)
|
|
|
|
|
call psb_simple_triad_a2av(valsnd,iasnd,jasnd,sdsz,bsdindx,&
|
|
|
|
@ -610,6 +637,8 @@ subroutine psb_d_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx)
|
|
|
|
|
call psb_errpush(info,name,a_err='wrong A2AV alg selector')
|
|
|
|
|
goto 9999
|
|
|
|
|
end select
|
|
|
|
|
if (do_timings) call psb_toc(idx_a2av)
|
|
|
|
|
if (do_timings) call psb_tic(idx_phase3)
|
|
|
|
|
|
|
|
|
|
if (info /= psb_success_) then
|
|
|
|
|
info=psb_err_from_subroutine_
|
|
|
|
@ -625,8 +654,10 @@ subroutine psb_d_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx)
|
|
|
|
|
! Extend the appropriate descriptor; started as R but on
|
|
|
|
|
! transpose it now describes C
|
|
|
|
|
!
|
|
|
|
|
if (do_timings) call psb_tic(idx_refine1)
|
|
|
|
|
call desc_r%clone(desc_rx,info)
|
|
|
|
|
call psb_cd_reinit(desc_rx,info)
|
|
|
|
|
if (do_timings) call psb_toc(idx_refine1)
|
|
|
|
|
!
|
|
|
|
|
! Take out non-local rows
|
|
|
|
|
!
|
|
|
|
@ -634,8 +665,16 @@ subroutine psb_d_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx)
|
|
|
|
|
lszr = iszr
|
|
|
|
|
call psb_coo_clean_negidx_inner(lszr,iarcv(1:iszr),jarcv(1:iszr),&
|
|
|
|
|
& acoo%val(nzl+1:nzl+iszr),nzt,info)
|
|
|
|
|
if (do_timings) call psb_tic(idx_refine2)
|
|
|
|
|
call desc_rx%g2lip_ins(jarcv(1:nzt),info)
|
|
|
|
|
if (do_timings) call psb_toc(idx_refine2)
|
|
|
|
|
if (do_timings) call psb_tic(idx_refine3)
|
|
|
|
|
!t0 = psb_wtime()
|
|
|
|
|
call psb_cdasb(desc_rx,info)
|
|
|
|
|
!t1 = psb_wtime()
|
|
|
|
|
!iters = iters +1
|
|
|
|
|
!if (me == 0) write(0,*) 'Glob_transpose cdasb(desc_rx):',iters,(t1-t0),' ',desc_rx%get_fmt()
|
|
|
|
|
if (do_timings) call psb_toc(idx_refine3)
|
|
|
|
|
acoo%ia(nzl+1:nzl+nzt) = iarcv(1:nzt)
|
|
|
|
|
acoo%ja(nzl+1:nzl+nzt) = jarcv(1:nzt)
|
|
|
|
|
nzl = nzl + nzt
|
|
|
|
@ -684,6 +723,7 @@ subroutine psb_d_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx)
|
|
|
|
|
Deallocate(brvindx,bsdindx,rvsz,sdsz,&
|
|
|
|
|
& iasnd,jasnd,valsnd,&
|
|
|
|
|
& iarcv,jarcv,stat=info)
|
|
|
|
|
if (do_timings) call psb_toc(idx_phase3)
|
|
|
|
|
if (debug_level >= psb_debug_outer_)&
|
|
|
|
|
& write(debug_unit,*) me,' ',trim(name),': Done'
|
|
|
|
|
|
|
|
|
|