*** empty log message ***

psblas3-type-indexed
Alfredo Buttari 20 years ago
parent 8919dacf49
commit 3ca7ba61e5

@ -216,7 +216,7 @@ subroutine psb_dhalov(x,desc_a,info,alpha,work,tran,mode)
character :: ltran character :: ltran
character(len=20) :: name, ch_err character(len=20) :: name, ch_err
name='psb_dhalom' name='psb_dhalov'
info=0 info=0
call psb_erractionsave(err_act) call psb_erractionsave(err_act)

@ -70,7 +70,7 @@ subroutine psb_ihalom(x,desc_a,info,alpha,jx,ik,work,tran,mode)
n = desc_a%matrix_data(psb_n_) n = desc_a%matrix_data(psb_n_)
nrow = desc_a%matrix_data(psb_n_row_) nrow = desc_a%matrix_data(psb_n_row_)
maxk=size(x,2)-jx+1 maxk=size(x,2)-ijx+1
if(present(ik)) then if(present(ik)) then
if(ik.gt.maxk) then if(ik.gt.maxk) then
@ -237,6 +237,8 @@ subroutine psb_ihalov(x,desc_a,info,alpha,work,tran,mode)
m = desc_a%matrix_data(psb_m_) m = desc_a%matrix_data(psb_m_)
n = desc_a%matrix_data(psb_n_) n = desc_a%matrix_data(psb_n_)
nrow = desc_a%matrix_data(psb_n_row_) nrow = desc_a%matrix_data(psb_n_row_)
! ncol = desc_a%matrix_data(psb_n_col_)
if (present(tran)) then if (present(tran)) then
ltran = tran ltran = tran

@ -1,26 +1,27 @@
subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info) subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info,data)
use psb_error_mod use psb_error_mod
use psb_descriptor_type use psb_descriptor_type
use mpi
implicit none implicit none
include 'mpif.h'
integer, intent(in) :: flag, n integer, intent(in) :: flag, n
integer, intent(out) :: info integer, intent(out) :: info
integer :: y(:,:), beta integer :: y(:,:), beta
integer, target :: work(:) integer, target :: work(:)
type(psb_desc_type) :: desc_a type(psb_desc_type) :: desc_a
integer, optional :: data
! locals ! locals
integer :: icontxt, nprow, npcol, myrow,& integer :: icontxt, nprow, npcol, myrow,&
& mycol, point_to_proc, nesd, nerv,& & mycol, point_to_proc, nesd, nerv,&
& proc_to_comm, p2ptag, icomm, p2pstat,& & proc_to_comm, p2ptag, icomm, p2pstat(mpi_status_size),&
& idxs, idxr, iret, errlen, ifcomm, rank,& & idxs, idxr, iret, errlen, ifcomm, rank,&
& err_act, totxch, ixrec, i, lw, idx_pt,& & err_act, totxch, ixrec, i, lw, idx_pt,&
& snd_pt, rcv_pt & snd_pt, rcv_pt
integer :: blacs_pnum, krecvid, ksendid integer :: blacs_pnum, krecvid, ksendid
integer, pointer, dimension(:) :: bsdidx, brvidx,& integer, pointer, dimension(:) :: bsdidx, brvidx,&
& sdsz, rvsz, prcid, ptp, rvhd, h_idx & sdsz, rvsz, prcid, ptp, rvhd, d_idx
integer :: int_err(5) integer :: int_err(5)
logical :: swap_mpi, swap_sync, swap_send, swap_recv logical :: swap_mpi, swap_sync, swap_send, swap_recv
integer, pointer, dimension(:) :: sndbuf, rcvbuf integer, pointer, dimension(:) :: sndbuf, rcvbuf
@ -65,7 +66,7 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info)
end interface end interface
info = 0 info = 0
name='psi_iswapdata' name='psi_dswap_data'
call psb_erractionsave(err_act) call psb_erractionsave(err_act)
icontxt=desc_a%matrix_data(psb_ctxt_) icontxt=desc_a%matrix_data(psb_ctxt_)
@ -95,19 +96,31 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info)
swap_sync = iand(flag,psb_swap_sync_).ne.0 swap_sync = iand(flag,psb_swap_sync_).ne.0
swap_send = iand(flag,psb_swap_send_).ne.0 swap_send = iand(flag,psb_swap_send_).ne.0
swap_recv = iand(flag,psb_swap_recv_).ne.0 swap_recv = iand(flag,psb_swap_recv_).ne.0
h_idx => desc_a%halo_index
idxs = 0 if(present(data)) then
idxr = 0 if(data.eq.psb_comm_halo_) then
d_idx => desc_a%halo_index
else if(data.eq.psb_comm_ovr_) then
d_idx => desc_a%ovrlap_index
else
d_idx => desc_a%halo_index
end if
else
d_idx => desc_a%halo_index
end if
idxs = 1
idxr = 1
totxch = 0 totxch = 0
point_to_proc = 1 point_to_proc = 1
rvhd(:) = mpi_request_null rvhd(:) = mpi_request_null
! prepare info for communications ! prepare info for communications
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
do while (proc_to_comm.ne.-1) do while (proc_to_comm.ne.-1)
if(proc_to_comm .ne. myrow) totxch = totxch+1 if(proc_to_comm .ne. myrow) totxch = totxch+1
nerv = h_idx(point_to_proc+psb_n_elem_recv_) nerv = d_idx(point_to_proc+psb_n_elem_recv_)
nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_)
prcid(proc_to_comm) = blacs_pnum(icontxt,proc_to_comm,mycol) prcid(proc_to_comm) = blacs_pnum(icontxt,proc_to_comm,mycol)
ptp(proc_to_comm) = point_to_proc ptp(proc_to_comm) = point_to_proc
@ -121,7 +134,7 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info)
idxs = idxs+sdsz(proc_to_comm) idxs = idxs+sdsz(proc_to_comm)
point_to_proc = point_to_proc+nerv+nesd+3 point_to_proc = point_to_proc+nerv+nesd+3
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
end do end do
if((idxr+idxs).lt.size(work)) then if((idxr+idxs).lt.size(work)) then
@ -140,18 +153,19 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info)
! gather elements into sendbuffer for swapping ! gather elements into sendbuffer for swapping
point_to_proc = 1 point_to_proc = 1
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
do while (proc_to_comm .ne. -1) do while (proc_to_comm .ne. -1)
nerv = h_idx(point_to_proc+psb_n_elem_recv_) nerv = d_idx(point_to_proc+psb_n_elem_recv_)
nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_)
idx_pt = point_to_proc+nerv+psb_elem_send_ idx_pt = point_to_proc+nerv+psb_elem_send_
snd_pt = bsdidx(proc_to_comm) snd_pt = bsdidx(proc_to_comm)
call psi_gth(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),&
call psi_gth(nesd,n,d_idx(idx_pt:idx_pt+nesd-1),&
& y,sndbuf(snd_pt:snd_pt+nesd*n-1)) & y,sndbuf(snd_pt:snd_pt+nesd*n-1))
point_to_proc = point_to_proc+nerv+nesd+3 point_to_proc = point_to_proc+nerv+nesd+3
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
end do end do
! swap elements using mpi_alltoallv ! swap elements using mpi_alltoallv
@ -167,32 +181,32 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info)
! scatter elements from receivebuffer after swapping ! scatter elements from receivebuffer after swapping
point_to_proc = 1 point_to_proc = 1
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
do while (proc_to_comm .ne. -1) do while (proc_to_comm .ne. -1)
nerv = h_idx(point_to_proc+psb_n_elem_recv_) nerv = d_idx(point_to_proc+psb_n_elem_recv_)
nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_)
idx_pt = point_to_proc+psb_elem_recv_ idx_pt = point_to_proc+psb_elem_recv_
rcv_pt = brvidx(proc_to_comm) rcv_pt = brvidx(proc_to_comm)
call psi_sct(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& call psi_sct(nerv,n,d_idx(idx_pt:idx_pt+nerv-1),&
& rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y)
point_to_proc = point_to_proc+nerv+nesd+3 point_to_proc = point_to_proc+nerv+nesd+3
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
end do end do
else if (swap_sync) then else if (swap_sync) then
point_to_proc = 1 point_to_proc = 1
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
do while (proc_to_comm .ne. -1) do while (proc_to_comm .ne. -1)
nerv = h_idx(point_to_proc+psb_n_elem_recv_) nerv = d_idx(point_to_proc+psb_n_elem_recv_)
nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_)
if (proc_to_comm .lt. myrow) then if (proc_to_comm .lt. myrow) then
! First I send ! First I send
idx_pt = point_to_proc+nerv+psb_elem_send_ idx_pt = point_to_proc+nerv+psb_elem_send_
snd_pt = bsdidx(proc_to_comm) snd_pt = bsdidx(proc_to_comm)
call psi_gth(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& call psi_gth(nesd,n,d_idx(idx_pt:idx_pt+nesd-1),&
& y,sndbuf(snd_pt:snd_pt+nesd*n-1)) & y,sndbuf(snd_pt:snd_pt+nesd*n-1))
call igesd2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0) call igesd2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0)
! Then I receive ! Then I receive
@ -205,51 +219,50 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info)
! Then I send ! Then I send
idx_pt = point_to_proc+nerv+psb_elem_send_ idx_pt = point_to_proc+nerv+psb_elem_send_
snd_pt = bsdidx(proc_to_comm) snd_pt = bsdidx(proc_to_comm)
call psi_gth(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& call psi_gth(nesd,n,d_idx(idx_pt:idx_pt+nesd-1),&
& y,sndbuf(snd_pt:snd_pt+nesd*n-1)) & y,sndbuf(snd_pt:snd_pt+nesd*n-1))
call igesd2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0) call igesd2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0)
else if (proc_to_comm .eq. myrow) then else if (proc_to_comm .eq. myrow) then
! I send to myself ! I send to myself
idx_pt = point_to_proc+nerv+psb_elem_send_ idx_pt = point_to_proc+nerv+psb_elem_send_
snd_pt = bsdidx(proc_to_comm) snd_pt = bsdidx(proc_to_comm)
call psi_gth(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& call psi_gth(nesd,n,d_idx(idx_pt:idx_pt+nesd-1),&
& y,sndbuf(snd_pt:snd_pt+nesd*n-1)) & y,sndbuf(snd_pt:snd_pt+nesd*n-1))
end if end if
point_to_proc = point_to_proc+nerv+nesd+3 point_to_proc = point_to_proc+nerv+nesd+3
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
end do end do
point_to_proc = 1 point_to_proc = 1
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
do while (proc_to_comm .ne. -1) do while (proc_to_comm .ne. -1)
nerv = h_idx(point_to_proc+psb_n_elem_recv_) nerv = d_idx(point_to_proc+psb_n_elem_recv_)
nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_)
if(proc_to_comm.ne.myrow) then if(proc_to_comm.ne.myrow) then
idx_pt = point_to_proc+psb_elem_recv_ idx_pt = point_to_proc+psb_elem_recv_
rcv_pt = brvidx(proc_to_comm) rcv_pt = brvidx(proc_to_comm)
call psi_sct(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& call psi_sct(nerv,n,d_idx(idx_pt:idx_pt+nerv-1),&
& rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y)
else else
idx_pt = point_to_proc+psb_elem_recv_ idx_pt = point_to_proc+psb_elem_recv_
snd_pt = bsdidx(proc_to_comm) snd_pt = bsdidx(proc_to_comm)
call psi_sct(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& call psi_sct(nerv,n,d_idx(idx_pt:idx_pt+nerv-1),&
& sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y)
end if end if
point_to_proc = point_to_proc+nerv+nesd+3 point_to_proc = point_to_proc+nerv+nesd+3
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
end do end do
else if (swap_send .and. swap_recv) then else if (swap_send .and. swap_recv) then
! First I post all the non blocking receives ! First I post all the non blocking receives
point_to_proc = 1 point_to_proc = 1
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
do while (proc_to_comm .ne. -1) do while (proc_to_comm .ne. -1)
nerv = h_idx(point_to_proc+psb_n_elem_recv_) nerv = d_idx(point_to_proc+psb_n_elem_recv_)
nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_)
if(proc_to_comm.ne.myrow) then if(proc_to_comm.ne.myrow) then
p2ptag = krecvid(icontxt,proc_to_comm,myrow) p2ptag = krecvid(icontxt,proc_to_comm,myrow)
@ -266,19 +279,20 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info)
end if end if
point_to_proc = point_to_proc+nerv+nesd+3 point_to_proc = point_to_proc+nerv+nesd+3
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
end do end do
! Then I post all the blocking sends ! Then I post all the blocking sends
point_to_proc = 1 point_to_proc = 1
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
do while (proc_to_comm .ne. -1) do while (proc_to_comm .ne. -1)
nerv = h_idx(point_to_proc+psb_n_elem_recv_) nerv = d_idx(point_to_proc+psb_n_elem_recv_)
nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_)
idx_pt = point_to_proc+nerv+psb_elem_send_ idx_pt = point_to_proc+nerv+psb_elem_send_
snd_pt = bsdidx(proc_to_comm) snd_pt = bsdidx(proc_to_comm)
call psi_gth(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),&
call psi_gth(nesd,n,d_idx(idx_pt:idx_pt+nesd-1),&
& y,sndbuf(snd_pt:snd_pt+nesd*n-1)) & y,sndbuf(snd_pt:snd_pt+nesd*n-1))
if(proc_to_comm .ne. myrow) then if(proc_to_comm .ne. myrow) then
@ -294,7 +308,7 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info)
end if end if
end if end if
point_to_proc = point_to_proc+nerv+nesd+3 point_to_proc = point_to_proc+nerv+nesd+3
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
end do end do
do i=1, totxch do i=1, totxch
@ -309,13 +323,13 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info)
if (ixrec .ne. mpi_undefined) then if (ixrec .ne. mpi_undefined) then
ixrec=ixrec-1 ! mpi_waitany returns an 1 to nprow index ixrec=ixrec-1 ! mpi_waitany returns an 1 to nprow index
point_to_proc = ptp(ixrec) point_to_proc = ptp(ixrec)
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
nerv = h_idx(point_to_proc+psb_n_elem_recv_) nerv = d_idx(point_to_proc+psb_n_elem_recv_)
nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_)
idx_pt = point_to_proc+psb_elem_recv_ idx_pt = point_to_proc+psb_elem_recv_
rcv_pt = brvidx(proc_to_comm) rcv_pt = brvidx(proc_to_comm)
call psi_sct(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& call psi_sct(nerv,n,d_idx(idx_pt:idx_pt+nerv-1),&
& rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y)
else else
int_err(1) = ixrec int_err(1) = ixrec
@ -326,20 +340,20 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info)
end do end do
point_to_proc = 1 point_to_proc = 1
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
do while (proc_to_comm .ne. -1) do while (proc_to_comm .ne. -1)
nerv = h_idx(point_to_proc+psb_n_elem_recv_) nerv = d_idx(point_to_proc+psb_n_elem_recv_)
nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_)
if(proc_to_comm .eq. myrow) then if(proc_to_comm .eq. myrow) then
idx_pt = point_to_proc+psb_elem_recv_ idx_pt = point_to_proc+psb_elem_recv_
snd_pt = bsdidx(proc_to_comm) snd_pt = bsdidx(proc_to_comm)
call psi_sct(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& call psi_sct(nerv,n,d_idx(idx_pt:idx_pt+nerv-1),&
& sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y)
end if end if
point_to_proc = point_to_proc+nerv+nesd+3 point_to_proc = point_to_proc+nerv+nesd+3
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
end do end do
@ -347,45 +361,45 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info)
else if (swap_send) then else if (swap_send) then
point_to_proc = 1 point_to_proc = 1
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
do while (proc_to_comm .ne. -1) do while (proc_to_comm .ne. -1)
nerv = h_idx(point_to_proc+psb_n_elem_recv_) nerv = d_idx(point_to_proc+psb_n_elem_recv_)
nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_)
idx_pt = point_to_proc+nerv+psb_elem_send_ idx_pt = point_to_proc+nerv+psb_elem_send_
snd_pt = bsdidx(proc_to_comm) snd_pt = bsdidx(proc_to_comm)
call psi_gth(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& call psi_gth(nesd,n,d_idx(idx_pt:idx_pt+nesd-1),&
& y,sndbuf(snd_pt:snd_pt+nesd*n-1)) & y,sndbuf(snd_pt:snd_pt+nesd*n-1))
call igesd2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0) call igesd2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0)
point_to_proc = point_to_proc+nerv+nesd+3 point_to_proc = point_to_proc+nerv+nesd+3
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
end do end do
else if (swap_recv) then else if (swap_recv) then
point_to_proc = 1 point_to_proc = 1
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
do while (proc_to_comm .ne. -1) do while (proc_to_comm .ne. -1)
nerv = h_idx(point_to_proc+psb_n_elem_recv_) nerv = d_idx(point_to_proc+psb_n_elem_recv_)
nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_)
if(proc_to_comm.ne.myrow) then if(proc_to_comm.ne.myrow) then
rcv_pt = brvidx(proc_to_comm) rcv_pt = brvidx(proc_to_comm)
call igerv2d(icontxt,nerv,n,rcvbuf(rcv_pt),nerv,proc_to_comm,0) call igerv2d(icontxt,nerv,n,rcvbuf(rcv_pt),nerv,proc_to_comm,0)
idx_pt = point_to_proc+psb_elem_recv_ idx_pt = point_to_proc+psb_elem_recv_
rcv_pt = brvidx(proc_to_comm) rcv_pt = brvidx(proc_to_comm)
call psi_sct(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& call psi_sct(nerv,n,d_idx(idx_pt:idx_pt+nerv-1),&
& rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y)
else else
idx_pt = point_to_proc+psb_elem_recv_ idx_pt = point_to_proc+psb_elem_recv_
snd_pt = bsdidx(proc_to_comm) snd_pt = bsdidx(proc_to_comm)
call psi_sct(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& call psi_sct(nerv,n,d_idx(idx_pt:idx_pt+nerv-1),&
& sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y)
end if end if
point_to_proc = point_to_proc+nerv+nesd+3 point_to_proc = point_to_proc+nerv+nesd+3
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
end do end do
end if end if
@ -403,34 +417,30 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info)
end subroutine psi_iswapdatam end subroutine psi_iswapdatam
subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info,data)
subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info)
use psb_error_mod use psb_error_mod
use psb_descriptor_type use psb_descriptor_type
use mpi
implicit none implicit none
include 'mpif.h'
integer, intent(in) :: flag integer, intent(in) :: flag
integer, intent(out) :: info integer, intent(out) :: info
integer :: y(:), beta integer :: y(:), beta
integer, target :: work(:) integer, target :: work(:)
type(psb_desc_type) :: desc_a type(psb_desc_type) :: desc_a
integer, optional :: data
! locals ! locals
integer :: icontxt, nprow, npcol, myrow,& integer :: icontxt, nprow, npcol, myrow,&
& mycol, point_to_proc, nesd, nerv,& & mycol, point_to_proc, nesd, nerv,&
& proc_to_comm, p2ptag, icomm, p2pstat,& & proc_to_comm, p2ptag, icomm, p2pstat(mpi_status_size),&
& idxs, idxr, iret, errlen, ifcomm, rank,& & idxs, idxr, iret, errlen, ifcomm, rank,&
& err_act, totxch, ixrec, i, lw, idx_pt,& & err_act, totxch, ixrec, i, lw, idx_pt,&
& snd_pt, rcv_pt, n & snd_pt, rcv_pt, n
integer, pointer, dimension(:) :: bsdidx, brvidx,& integer, pointer, dimension(:) :: bsdidx, brvidx,&
& sdsz, rvsz, prcid, ptp, rvhd, h_idx & sdsz, rvsz, prcid, ptp, rvhd, d_idx
integer :: blacs_pnum, krecvid, ksendid integer :: blacs_pnum, krecvid, ksendid
integer :: int_err(5) integer :: int_err(5)
logical :: swap_mpi, swap_sync, swap_send, swap_recv logical :: swap_mpi, swap_sync, swap_send, swap_recv
@ -476,7 +486,7 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info)
end interface end interface
info = 0 info = 0
name='psi_iswapdatav' name='psi_iswap_datav'
call psb_erractionsave(err_act) call psb_erractionsave(err_act)
icontxt=desc_a%matrix_data(psb_ctxt_) icontxt=desc_a%matrix_data(psb_ctxt_)
@ -494,6 +504,7 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info)
call blacs_get(icontxt,10,icomm) call blacs_get(icontxt,10,icomm)
allocate(sdsz(0:nprow-1), rvsz(0:nprow-1), bsdidx(0:nprow-1),& allocate(sdsz(0:nprow-1), rvsz(0:nprow-1), bsdidx(0:nprow-1),&
& brvidx(0:nprow-1), rvhd(0:nprow-1), prcid(0:nprow-1),& & brvidx(0:nprow-1), rvhd(0:nprow-1), prcid(0:nprow-1),&
& ptp(0:nprow-1), stat=info) & ptp(0:nprow-1), stat=info)
@ -506,20 +517,32 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info)
swap_sync = iand(flag,psb_swap_sync_).ne.0 swap_sync = iand(flag,psb_swap_sync_).ne.0
swap_send = iand(flag,psb_swap_send_).ne.0 swap_send = iand(flag,psb_swap_send_).ne.0
swap_recv = iand(flag,psb_swap_recv_).ne.0 swap_recv = iand(flag,psb_swap_recv_).ne.0
h_idx => desc_a%halo_index
idxs = 0 if(present(data)) then
idxr = 0 if(data.eq.psb_comm_halo_) then
d_idx => desc_a%halo_index
else if(data.eq.psb_comm_ovr_) then
d_idx => desc_a%ovrlap_index
else
d_idx => desc_a%halo_index
end if
else
d_idx => desc_a%halo_index
end if
idxs = 1
idxr = 1
totxch = 0 totxch = 0
point_to_proc = 1 point_to_proc = 1
rvhd(:) = mpi_request_null rvhd(:) = mpi_request_null
n=1 n=1
! prepare info for communications ! prepare info for communications
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
do while (proc_to_comm.ne.-1) do while (proc_to_comm.ne.-1)
if(proc_to_comm .ne. myrow) totxch = totxch+1 if(proc_to_comm .ne. myrow) totxch = totxch+1
nerv = h_idx(point_to_proc+psb_n_elem_recv_) nerv = d_idx(point_to_proc+psb_n_elem_recv_)
nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_)
prcid(proc_to_comm) = blacs_pnum(icontxt,proc_to_comm,mycol) prcid(proc_to_comm) = blacs_pnum(icontxt,proc_to_comm,mycol)
ptp(proc_to_comm) = point_to_proc ptp(proc_to_comm) = point_to_proc
@ -533,7 +556,7 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info)
idxs = idxs+sdsz(proc_to_comm) idxs = idxs+sdsz(proc_to_comm)
point_to_proc = point_to_proc+nerv+nesd+3 point_to_proc = point_to_proc+nerv+nesd+3
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
end do end do
if((idxr+idxs).lt.size(work)) then if((idxr+idxs).lt.size(work)) then
@ -552,18 +575,18 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info)
! gather elements into sendbuffer for swapping ! gather elements into sendbuffer for swapping
point_to_proc = 1 point_to_proc = 1
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
do while (proc_to_comm .ne. -1) do while (proc_to_comm .ne. -1)
nerv = h_idx(point_to_proc+psb_n_elem_recv_) nerv = d_idx(point_to_proc+psb_n_elem_recv_)
nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_)
idx_pt = point_to_proc+nerv+psb_elem_send_ idx_pt = point_to_proc+nerv+psb_elem_send_
snd_pt = bsdidx(proc_to_comm) snd_pt = bsdidx(proc_to_comm)
call psi_gth(nesd,h_idx(idx_pt:idx_pt+nesd-1),& call psi_gth(nesd,d_idx(idx_pt:idx_pt+nesd-1),&
& y,sndbuf(snd_pt:snd_pt+nesd-1)) & y,sndbuf(snd_pt:snd_pt+nesd-1))
point_to_proc = point_to_proc+nerv+nesd+3 point_to_proc = point_to_proc+nerv+nesd+3
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
end do end do
! swap elements using mpi_alltoallv ! swap elements using mpi_alltoallv
@ -579,32 +602,32 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info)
! scatter elements from receivebuffer after swapping ! scatter elements from receivebuffer after swapping
point_to_proc = 1 point_to_proc = 1
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
do while (proc_to_comm .ne. -1) do while (proc_to_comm .ne. -1)
nerv = h_idx(point_to_proc+psb_n_elem_recv_) nerv = d_idx(point_to_proc+psb_n_elem_recv_)
nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_)
idx_pt = point_to_proc+psb_elem_recv_ idx_pt = point_to_proc+psb_elem_recv_
rcv_pt = brvidx(proc_to_comm) rcv_pt = brvidx(proc_to_comm)
call psi_sct(nerv,h_idx(idx_pt:idx_pt+nerv-1),& call psi_sct(nerv,d_idx(idx_pt:idx_pt+nerv-1),&
& rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y)
point_to_proc = point_to_proc+nerv+nesd+3 point_to_proc = point_to_proc+nerv+nesd+3
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
end do end do
else if (swap_sync) then else if (swap_sync) then
point_to_proc = 1 point_to_proc = 1
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
do while (proc_to_comm .ne. -1) do while (proc_to_comm .ne. -1)
nerv = h_idx(point_to_proc+psb_n_elem_recv_) nerv = d_idx(point_to_proc+psb_n_elem_recv_)
nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_)
if (proc_to_comm .lt. myrow) then if (proc_to_comm .lt. myrow) then
! First I send ! First I send
idx_pt = point_to_proc+nerv+psb_elem_send_ idx_pt = point_to_proc+nerv+psb_elem_send_
snd_pt = bsdidx(proc_to_comm) snd_pt = bsdidx(proc_to_comm)
call psi_gth(nesd,h_idx(idx_pt:idx_pt+nesd-1),& call psi_gth(nesd,d_idx(idx_pt:idx_pt+nesd-1),&
& y,sndbuf(snd_pt:snd_pt+nesd-1)) & y,sndbuf(snd_pt:snd_pt+nesd-1))
call igesd2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0) call igesd2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0)
! Then I receive ! Then I receive
@ -617,51 +640,50 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info)
! Then I send ! Then I send
idx_pt = point_to_proc+nerv+psb_elem_send_ idx_pt = point_to_proc+nerv+psb_elem_send_
snd_pt = bsdidx(proc_to_comm) snd_pt = bsdidx(proc_to_comm)
call psi_gth(nesd,h_idx(idx_pt:idx_pt+nesd-1),& call psi_gth(nesd,d_idx(idx_pt:idx_pt+nesd-1),&
& y,sndbuf(snd_pt:snd_pt+nesd-1)) & y,sndbuf(snd_pt:snd_pt+nesd-1))
call igesd2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0) call igesd2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0)
else if (proc_to_comm .eq. myrow) then else if (proc_to_comm .eq. myrow) then
! I send to myself ! I send to myself
idx_pt = point_to_proc+nerv+psb_elem_send_ idx_pt = point_to_proc+nerv+psb_elem_send_
snd_pt = bsdidx(proc_to_comm) snd_pt = bsdidx(proc_to_comm)
call psi_gth(nesd,h_idx(idx_pt:idx_pt+nesd-1),& call psi_gth(nesd,d_idx(idx_pt:idx_pt+nesd-1),&
& y,sndbuf(snd_pt:snd_pt+nesd-1)) & y,sndbuf(snd_pt:snd_pt+nesd-1))
end if end if
point_to_proc = point_to_proc+nerv+nesd+3 point_to_proc = point_to_proc+nerv+nesd+3
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
end do end do
point_to_proc = 1 point_to_proc = 1
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
do while (proc_to_comm .ne. -1) do while (proc_to_comm .ne. -1)
nerv = h_idx(point_to_proc+psb_n_elem_recv_) nerv = d_idx(point_to_proc+psb_n_elem_recv_)
nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_)
if(proc_to_comm.ne.myrow) then if(proc_to_comm.ne.myrow) then
idx_pt = point_to_proc+psb_elem_recv_ idx_pt = point_to_proc+psb_elem_recv_
rcv_pt = brvidx(proc_to_comm) rcv_pt = brvidx(proc_to_comm)
call psi_sct(nerv,h_idx(idx_pt:idx_pt+nerv-1),& call psi_sct(nerv,d_idx(idx_pt:idx_pt+nerv-1),&
& rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y)
else else
idx_pt = point_to_proc+psb_elem_recv_ idx_pt = point_to_proc+psb_elem_recv_
snd_pt = bsdidx(proc_to_comm) snd_pt = bsdidx(proc_to_comm)
call psi_sct(nerv,h_idx(idx_pt:idx_pt+nerv-1),& call psi_sct(nerv,d_idx(idx_pt:idx_pt+nerv-1),&
& sndbuf(snd_pt:snd_pt+nesd-1),beta,y) & sndbuf(snd_pt:snd_pt+nesd-1),beta,y)
end if end if
point_to_proc = point_to_proc+nerv+nesd+3 point_to_proc = point_to_proc+nerv+nesd+3
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
end do end do
else if (swap_send .and. swap_recv) then else if (swap_send .and. swap_recv) then
! First I post all the non blocking receives ! First I post all the non blocking receives
point_to_proc = 1 point_to_proc = 1
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
do while (proc_to_comm .ne. -1) do while (proc_to_comm .ne. -1)
nerv = h_idx(point_to_proc+psb_n_elem_recv_) nerv = d_idx(point_to_proc+psb_n_elem_recv_)
nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_)
if(proc_to_comm.ne.myrow) then if(proc_to_comm.ne.myrow) then
p2ptag = krecvid(icontxt,proc_to_comm,myrow) p2ptag = krecvid(icontxt,proc_to_comm,myrow)
@ -678,19 +700,19 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info)
end if end if
point_to_proc = point_to_proc+nerv+nesd+3 point_to_proc = point_to_proc+nerv+nesd+3
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
end do end do
! Then I post all the blocking sends ! Then I post all the blocking sends
point_to_proc = 1 point_to_proc = 1
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
do while (proc_to_comm .ne. -1) do while (proc_to_comm .ne. -1)
nerv = h_idx(point_to_proc+psb_n_elem_recv_) nerv = d_idx(point_to_proc+psb_n_elem_recv_)
nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_)
idx_pt = point_to_proc+nerv+psb_elem_send_ idx_pt = point_to_proc+nerv+psb_elem_send_
snd_pt = bsdidx(proc_to_comm) snd_pt = bsdidx(proc_to_comm)
call psi_gth(nesd,h_idx(idx_pt:idx_pt+nesd-1),& call psi_gth(nesd,d_idx(idx_pt:idx_pt+nesd-1),&
& y,sndbuf(snd_pt:snd_pt+nesd-1)) & y,sndbuf(snd_pt:snd_pt+nesd-1))
if(proc_to_comm .ne. myrow) then if(proc_to_comm .ne. myrow) then
@ -706,7 +728,7 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info)
end if end if
end if end if
point_to_proc = point_to_proc+nerv+nesd+3 point_to_proc = point_to_proc+nerv+nesd+3
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
end do end do
do i=1, totxch do i=1, totxch
@ -717,18 +739,17 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info)
call psb_errpush(info,name,i_err=int_err) call psb_errpush(info,name,i_err=int_err)
goto 9999 goto 9999
end if end if
if (ixrec .ne. mpi_undefined) then if (ixrec .ne. mpi_undefined) then
ixrec=ixrec-1 ! mpi_waitany returns an 1 to nprow index ixrec=ixrec-1 ! mpi_waitany returns an 1 to nprow index
point_to_proc = ptp(ixrec) point_to_proc = ptp(ixrec)
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
nerv = h_idx(point_to_proc+psb_n_elem_recv_) nerv = d_idx(point_to_proc+psb_n_elem_recv_)
nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_)
idx_pt = point_to_proc+psb_elem_recv_ idx_pt = point_to_proc+psb_elem_recv_
snd_pt = bsdidx(proc_to_comm) rcv_pt = brvidx(proc_to_comm)
call psi_sct(nerv,h_idx(idx_pt:idx_pt+nerv-1),& call psi_sct(nerv,d_idx(idx_pt:idx_pt+nerv-1),&
& sndbuf(snd_pt:snd_pt+nesd-1),beta,y) & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y)
else else
int_err(1) = ixrec int_err(1) = ixrec
info=400 info=400
@ -738,66 +759,65 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info)
end do end do
point_to_proc = 1 point_to_proc = 1
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
do while (proc_to_comm .ne. -1) do while (proc_to_comm .ne. -1)
nerv = h_idx(point_to_proc+psb_n_elem_recv_) nerv = d_idx(point_to_proc+psb_n_elem_recv_)
nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_)
if(proc_to_comm .eq. myrow) then if(proc_to_comm .eq. myrow) then
idx_pt = point_to_proc+psb_elem_recv_ idx_pt = point_to_proc+psb_elem_recv_
snd_pt = bsdidx(proc_to_comm) snd_pt = bsdidx(proc_to_comm)
call psi_sct(nerv,h_idx(idx_pt:idx_pt+nerv-1),& call psi_sct(nerv,d_idx(idx_pt:idx_pt+nerv-1),&
& sndbuf(snd_pt:snd_pt+nesd-1),beta,y) & sndbuf(snd_pt:snd_pt+nesd-1),beta,y)
end if end if
point_to_proc = point_to_proc+nerv+nesd+3 point_to_proc = point_to_proc+nerv+nesd+3
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
end do end do
else if (swap_send) then else if (swap_send) then
point_to_proc = 1 point_to_proc = 1
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
do while (proc_to_comm .ne. -1) do while (proc_to_comm .ne. -1)
nerv = h_idx(point_to_proc+psb_n_elem_recv_) nerv = d_idx(point_to_proc+psb_n_elem_recv_)
nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_)
idx_pt = point_to_proc+nerv+psb_elem_send_ idx_pt = point_to_proc+nerv+psb_elem_send_
snd_pt = bsdidx(proc_to_comm) snd_pt = bsdidx(proc_to_comm)
call psi_gth(nesd,h_idx(idx_pt:idx_pt+nesd-1),& call psi_gth(nesd,d_idx(idx_pt:idx_pt+nesd-1),&
& y,sndbuf(snd_pt:snd_pt+nesd-1)) & y,sndbuf(snd_pt:snd_pt+nesd-1))
call igesd2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0) call igesd2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0)
point_to_proc = point_to_proc+nerv+nesd+3 point_to_proc = point_to_proc+nerv+nesd+3
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
end do end do
else if (swap_recv) then else if (swap_recv) then
point_to_proc = 1 point_to_proc = 1
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
do while (proc_to_comm .ne. -1) do while (proc_to_comm .ne. -1)
nerv = h_idx(point_to_proc+psb_n_elem_recv_) nerv = d_idx(point_to_proc+psb_n_elem_recv_)
nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_)
if(proc_to_comm.ne.myrow) then if(proc_to_comm.ne.myrow) then
rcv_pt = brvidx(proc_to_comm) rcv_pt = brvidx(proc_to_comm)
call igerv2d(icontxt,nerv,1,rcvbuf(rcv_pt),nerv,proc_to_comm,0) call igerv2d(icontxt,nerv,1,rcvbuf(rcv_pt),nerv,proc_to_comm,0)
idx_pt = point_to_proc+psb_elem_recv_ idx_pt = point_to_proc+psb_elem_recv_
rcv_pt = brvidx(proc_to_comm) rcv_pt = brvidx(proc_to_comm)
call psi_sct(nerv,h_idx(idx_pt:idx_pt+nerv-1),& call psi_sct(nerv,d_idx(idx_pt:idx_pt+nerv-1),&
& rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y)
else else
idx_pt = point_to_proc+psb_elem_recv_ idx_pt = point_to_proc+psb_elem_recv_
snd_pt = bsdidx(proc_to_comm) snd_pt = bsdidx(proc_to_comm)
call psi_sct(nerv,h_idx(idx_pt:idx_pt+nerv-1),& call psi_sct(nerv,d_idx(idx_pt:idx_pt+nerv-1),&
& sndbuf(snd_pt:snd_pt+nesd-1),beta,y) & sndbuf(snd_pt:snd_pt+nesd-1),beta,y)
end if end if
point_to_proc = point_to_proc+nerv+nesd+3 point_to_proc = point_to_proc+nerv+nesd+3
proc_to_comm = h_idx(point_to_proc+psb_proc_id_) proc_to_comm = d_idx(point_to_proc+psb_proc_id_)
end do end do
end if end if

@ -86,7 +86,7 @@ module psb_comm_mod
end subroutine psb_dscatterv end subroutine psb_dscatterv
end interface end interface
interface psb_dgather interface psb_gather
subroutine psb_dgatherm(globx, locx, desc_a, info, iroot,& subroutine psb_dgatherm(globx, locx, desc_a, info, iroot,&
& iiglobx, ijglobx, iilocx,ijlocx,ik) & iiglobx, ijglobx, iilocx,ijlocx,ik)
use psb_descriptor_type use psb_descriptor_type

@ -391,6 +391,11 @@ Module psb_tools_mod
type(psb_dspmat_type), intent(inout) ::a type(psb_dspmat_type), intent(inout) ::a
integer, intent(out) :: info integer, intent(out) :: info
end subroutine psb_dspfree end subroutine psb_dspfree
subroutine psb_dspfrees(a,info)
use psb_spmat_type
type(psb_dspmat_type), intent(inout) ::a
integer, intent(out) :: info
end subroutine psb_dspfrees
end interface end interface

@ -73,19 +73,21 @@ module psi_mod
type(psb_desc_type) :: desc_a type(psb_desc_type) :: desc_a
integer, optional :: data integer, optional :: data
end subroutine psi_dswapdatav end subroutine psi_dswapdatav
subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info) subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info,data)
use psb_descriptor_type use psb_descriptor_type
integer, intent(in) :: flag, n integer, intent(in) :: flag, n
integer, intent(out) :: info integer, intent(out) :: info
integer :: y(:,:), beta, work(:) integer :: y(:,:), beta, work(:)
type(psb_desc_type) :: desc_a type(psb_desc_type) :: desc_a
integer, optional :: data
end subroutine psi_iswapdatam end subroutine psi_iswapdatam
subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info) subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info,data)
use psb_descriptor_type use psb_descriptor_type
integer, intent(in) :: flag integer, intent(in) :: flag
integer, intent(out) :: info integer, intent(out) :: info
integer :: y(:), beta, work(:) integer :: y(:), beta, work(:)
type(psb_desc_type) :: desc_a type(psb_desc_type) :: desc_a
integer, optional :: data
end subroutine psi_iswapdatav end subroutine psi_iswapdatav
end interface end interface

@ -289,6 +289,7 @@ contains
subroutine smooth_aggregate(info) subroutine smooth_aggregate(info)
use psb_serial_mod use psb_serial_mod
use psb_comm_mod
use psb_tools_mod use psb_tools_mod
use psb_error_mod use psb_error_mod
implicit none implicit none

@ -16,6 +16,18 @@ subroutine psb_dprecbld(a,p,desc_a,info,upd)
type(psb_desc_type), intent(in) :: desc_a type(psb_desc_type), intent(in) :: desc_a
character, intent(in), optional :: upd character, intent(in), optional :: upd
interface psb_cslu
subroutine psb_dcslu(a,desc_data,p,upd,info)
use psb_serial_mod
use psb_descriptor_type
use psb_prec_type
integer, intent(out) :: info
type(psb_dspmat_type), intent(in), target :: a
type(psb_desc_type),intent(in) :: desc_data
type(psb_dbase_prec), intent(inout) :: p
character, intent(in) :: upd
end subroutine psb_dcslu
end interface
! Local scalars ! Local scalars
Integer :: err, nnzero, n_row, n_col,I,j,icontxt,& Integer :: err, nnzero, n_row, n_col,I,j,icontxt,&
@ -113,7 +125,7 @@ subroutine psb_dprecbld(a,p,desc_a,info,upd)
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
end if end if
call psb_dgelp('n',n_row,1,a%Pl,p%baseprecv(1)%d,n_col,WORK,n_row,info) call psb_dgelp('n',a%Pl,p%baseprecv(1)%d,desc_a,info)
if(info /= 0) then if(info /= 0) then
info=4010 info=4010
ch_err='psb_dgelp' ch_err='psb_dgelp'
@ -126,7 +138,7 @@ subroutine psb_dprecbld(a,p,desc_a,info,upd)
if (debug) then if (debug) then
allocate(gd(mglob)) allocate(gd(mglob))
call psb_dgather(gd, p%baseprecv(1)%d, desc_a, info, iroot=iroot) call psb_gather(gd, p%baseprecv(1)%d, desc_a, info, iroot=iroot)
if(info /= 0) then if(info /= 0) then
info=4010 info=4010
ch_err='psb_dgatherm' ch_err='psb_dgatherm'
@ -166,7 +178,7 @@ subroutine psb_dprecbld(a,p,desc_a,info,upd)
select case(p%baseprecv(1)%iprcparm(f_type_)) select case(p%baseprecv(1)%iprcparm(f_type_))
case(f_ilu_n_,f_ilu_e_) case(f_ilu_n_,f_ilu_e_)
call psb_dcslu(a,desc_a,p%baseprecv(1),iupd,info) call psb_cslu(a,desc_a,p%baseprecv(1),iupd,info)
if(debug) write(0,*)me,': out of psb_dcslu' if(debug) write(0,*)me,': out of psb_dcslu'
if(info /= 0) then if(info /= 0) then
info=4010 info=4010
@ -449,6 +461,17 @@ subroutine psb_mlprec_bld(a,desc_a,p,info)
integer :: i, nrg, nzg, err_act,k integer :: i, nrg, nzg, err_act,k
character(len=20) :: name, ch_err character(len=20) :: name, ch_err
interface psb_splu
subroutine psb_dsplu(a,l,u,d,info,blck)
use psb_spmat_type
integer, intent(out) :: info
type(psb_dspmat_type),intent(in) :: a
type(psb_dspmat_type),intent(inout) :: l,u
type(psb_dspmat_type),intent(in), optional, target :: blck
real(kind(1.d0)), intent(inout) :: d(:)
end subroutine psb_dsplu
end interface
interface psb_genaggrmap interface psb_genaggrmap
subroutine psb_dgenaggrmap(aggr_type,a,desc_a,nlaggr,ilaggr,info) subroutine psb_dgenaggrmap(aggr_type,a,desc_a,nlaggr,ilaggr,info)
use psb_spmat_type use psb_spmat_type
@ -527,7 +550,7 @@ subroutine psb_mlprec_bld(a,desc_a,p,info)
case(f_ilu_n_,f_ilu_e_) case(f_ilu_n_,f_ilu_e_)
call psb_spreall(p%av(l_pr_),nzg,info) call psb_spreall(p%av(l_pr_),nzg,info)
call psb_spreall(p%av(u_pr_),nzg,info) call psb_spreall(p%av(u_pr_),nzg,info)
call psb_dsplu(p%av(ac_),p%av(l_pr_),p%av(u_pr_),p%d,info) call psb_splu(p%av(ac_),p%av(l_pr_),p%av(u_pr_),p%d,info)
if(info /= 0) then if(info /= 0) then
info=4011 info=4011
call psb_errpush(info,name) call psb_errpush(info,name)

@ -170,7 +170,6 @@ contains
end if end if
do do
! write(0,*)'++++',trw%infoa(psb_nnz_),size(trw%ia1),ktrw,k,i
if (ktrw > trw%infoa(psb_nnz_)) exit if (ktrw > trw%infoa(psb_nnz_)) exit
if (trw%ia1(ktrw) > i) exit if (trw%ia1(ktrw) > i) exit
k = trw%ia2(ktrw) k = trw%ia2(ktrw)

@ -1,735 +0,0 @@
program df_bench
use f90sparse
use mat_dist
use read_mat
use partgraph
use errormod
implicit none
! input parameters
character(len=20) :: cmethd, prec
character(len=40) :: mtrx_file, rhs_file
character(len=20) :: mtrx_name, name, ch_err
character(len=10) :: ptype
character(len=200) :: charbuf
integer :: inparms(20)
double precision ddot
external ddot
interface
! .....user passed subroutine.....
subroutine part_block(global_indx,n,np,pv,nv)
implicit none
integer, intent(in) :: global_indx, n, np
integer, intent(out) :: nv
integer, intent(out) :: pv(*)
end subroutine part_block
end interface ! local variables
interface
! .....user passed subroutine.....
subroutine part_blk2(global_indx,n,np,pv,nv)
implicit none
integer, intent(in) :: global_indx, n, np
integer, intent(out) :: nv
integer, intent(out) :: pv(*)
end subroutine part_blk2
end interface ! Local variables
integer, parameter :: izero=0, ione=1
character, parameter :: order='r'
real(kind(1.d0)), pointer, save :: b_col(:), x_col(:), r_col(:), &
& b_col_glob(:), x_col_glob(:), r_col_glob(:), b_glob(:,:), &
&z(:), q(:),z1(:), xm(:,:), ym(:,:)
integer :: iargc, check_descr, convert_descr
real(kind(1.d0)), parameter :: dzero = 0.d0, one = 1.d0
real(kind(1.d0)) :: mpi_wtime, t1, t2, t3, tprec, tslv, ttot, &
&r_amax, b_amax,bb(1,1), lambda,scale,resmx,resmxp, omega
integer :: nrhs, nrow, nx1, nx2, n_row, n_col, dim,iread,ip,io,no,nmats,&
& imat,irenum, igsmth, matop, jacswp
logical :: amroot
external iargc, mpi_wtime
integer bsze,overlap, nn, nprecs, novrs
common/part/bsze,overlap
integer, pointer :: work(:), precs(:), ovrs(:)
! sparse matrices
type(d_spmat) :: a, aux_a, h
type(d_prec) :: pre
!!$ type(d_precn) :: aprc
! dense matrices
real(kind(1.d0)), pointer :: aux_b(:,:) , aux1(:), aux2(:), vdiag(:), &
& aux_g(:,:), aux_x(:,:), d(:)
! communications data structure
type(desc_type) :: desc_a, desc_a_out
! blacs parameters
integer :: nprow, npcol, ictxt, iam, np, myprow, mypcol
! solver paramters
integer :: iter, itmax, ierr, itrace, ircode, ipart,&
& methd, istopc, irst,nv
integer, pointer :: ivg(:), ipv(:)
character(len=5) :: afmt
real(kind(1.d0)) :: err, eps
integer iparm(20)
real(kind(1.d0)) rparm(20)
! other variables
integer :: i,info,j, ntryslv
integer :: internal, m,ii,nnzero, itryslv
integer, parameter :: ncsw=4, ntcs=4
real(kind(1.d0)), pointer :: tsthal(:,:)
real(kind(1.d0)) :: tswmpi(ntcs,ncsw),tswsyn(ntcs,ncsw),tswsdrv(ntcs,ncsw)
! common area
integer m_problem, nproc, nc
! initialize blacs
call blacs_pinfo(iam, np)
call blacs_get(izero, izero, ictxt)
! rectangular Grid, Np x 1
call blacs_gridinit(ictxt, order, np, ione)
call blacs_gridinfo(ictxt, nprow, npcol, myprow, mypcol)
amroot = (myprow==0).and.(mypcol==0)
info=0
name='df_bench'
call psb_set_errverbosity(2)
call psb_set_erraction(0)
! call psb_erractionsave(err_act)
!
! Get parameters from file
!
if (amroot) then
read(*,*) cmethd
do i = 1, len(cmethd)
inparms(i) = iachar(cmethd(i:i))
end do
call igebs2d(ictxt,'all',' ',20,1,inparms,20)
read(*,*) afmt
do i = 1, len(afmt)
inparms(i) = iachar(afmt(i:i))
end do
call igebs2d(ictxt,'all',' ',20,1,inparms,20)
read(*,*) ipart
read(*,*) itmax
read(*,*) itrace
read(*,*) istopc
read(*,*) irst
read(*,*) irenum
read(*,*) ntryslv
read(*,*) nprecs
inparms(1) = ipart
inparms(2) = itmax
inparms(3) = itrace
inparms(4) = irenum
inparms(5) = ntryslv
inparms(6) = nprecs
inparms(7) = istopc
inparms(8) = irst
call igebs2d(ictxt,'all',' ',8,1,inparms,20)
!!$ write(0,*) 'Sent nprecs: ',nprecs
allocate(precs(nprecs))
read(*,*) (precs(i),i=1,nprecs)
call igebs2d(ictxt,'all',' ',nprecs,1,precs,nprecs)
read(*,*) novrs
call igebs2d(ictxt,'all',' ',1,1,novrs,1)
!!$ write(0,*) 'Sent novrs: ',novrs
allocate(ovrs(novrs))
read(*,*) (ovrs(i),i=1,novrs)
call igebs2d(ictxt,'all',' ',novrs,1,ovrs,novrs)
read(*,*) eps
call dgebs2d(ictxt,'all',' ',1,1,eps,1)
read(*,*) omega
call dgebs2d(ictxt,'all',' ',1,1,omega,1)
read(*,*) igsmth
call igebs2d(ictxt,'all',' ',1,1,igsmth,1)
read(*,*) matop
call igebs2d(ictxt,'all',' ',1,1,matop,1)
read(*,*) jacswp
call igebs2d(ictxt,'all',' ',1,1,jacswp,1)
read(*,*) nmats
call igebs2d(ictxt,'all',' ',1,1,nmats,1)
else
call igebr2d(ictxt,'a',' ',20,1,inparms,20,0,0)
do i = 1, len(cmethd)
cmethd(i:i) = achar(inparms(i))
end do
call igebr2d(ictxt,'a',' ',20,1,inparms,20,0,0)
do i = 1, len(afmt)
afmt(i:i) = achar(inparms(i))
end do
call igebr2d(ictxt,'all',' ',8,1,inparms,20,0,0)
ipart = inparms(1)
itmax = inparms(2)
itrace = inparms(3)
irenum = inparms(4)
ntryslv= inparms(5)
nprecs = inparms(6)
istopc = inparms(7)
irst = inparms(8)
!!$ write(0,*) 'Recvd nprecs: ',nprecs
allocate(precs(nprecs))
call igebr2d(ictxt,'all',' ',nprecs,1,precs,nprecs,0,0)
call igebr2d(ictxt,'all',' ',1,1,novrs,1,0,0)
!!$ write(0,*) 'Recvd novrs: ',novrs
allocate(ovrs(novrs))
call igebr2d(ictxt,'all',' ',novrs,1,ovrs,novrs,0,0)
call dgebr2d(ictxt,'all',' ',1,1,eps,1,0,0)
call dgebr2d(ictxt,'all',' ',1,1,omega,1,0,0)
call igebr2d(ictxt,'all',' ',1,1,igsmth,1,0,0)
call igebr2d(ictxt,'all',' ',1,1,matop,1,0,0)
call igebr2d(ictxt,'all',' ',1,1,jacswp,1,0,0)
call igebr2d(ictxt,'all',' ',1,1,nmats,1,0,0)
endif
do imat=1,nmats
if (amroot) then
!!$ read(*,*) mtrx_file,rhs_file
read(*,'(a)') charbuf
charbuf=adjustl(charbuf)
i=index(charbuf," ")
mtrx_file=charbuf(1:i-1)
rhs_file=adjustl(charbuf(i+1:))
i=index(mtrx_file,"/",back=.true.)
mtrx_name=trim(mtrx_file(i+1:))
write(0,*) 'Read mtx rhs : "',&
& mtrx_file,'" "',rhs_file,'" "',mtrx_name,'"'
endif
call blacs_barrier(ictxt,'A')
t1 = mpi_wtime()
! read the input matrix to be processed and (possibly) the RHS
nrhs = 1
nproc = nprow
if (amroot) then
nullify(aux_b)
call readmat(mtrx_file, aux_a, ictxt)
!!$ write(0,*) 'from readmat: ',aux_a%fida,aux_a%m,':',&
!!$ &aux_a%ia2(aux_a%m+1)-1,':',aux_a%ia1(1:10)
m_problem = aux_a%m
call igebs2d(ictxt,'a',' ',1,1,m_problem,1)
if (rhs_file /= 'none') then
! reading an rhs
call read_rhs(rhs_file,aux_b,ictxt)
end if
if (associated(aux_b).and.size(aux_b,1)==m_problem) then
! if any rhs were present, broadcast the first one
!!$ write(0,*) 'ok, got an rhs ',aux_b(m_problem,1)
b_col_glob =>aux_b(:,1)
else
!!$ write(0,*) 'inventing an rhs '
allocate(aux_b(m_problem,1), stat=ircode)
if (ircode /= 0) then
write(0,*) 'memory allocation failure in df_sample'
call blacs_abort(ictxt,-1)
stop
endif
b_col_glob =>aux_b(:,1)
do i=1, m_problem
b_col_glob(i) = 1.d0
enddo
endif
call dgebs2d(ictxt,'a',' ',m_problem,1,b_col_glob,m_problem)
else
call igebr2d(ictxt,'a',' ',1,1,m_problem,1,0,0)
!!$ write(0,*) 'Receiving AUX_B'
allocate(aux_b(m_problem,1), stat=ircode)
if (ircode /= 0) then
write(0,*) 'Memory allocation failure in DF_SAMPLE'
call blacs_abort(ictxt,-1)
stop
endif
b_col_glob =>aux_b(:,1)
call dgebr2d(ictxt,'A',' ',m_problem,1,b_col_glob,m_problem,0,0)
end if
! switch over different partition types
if (ipart.eq.0) then
call blacs_barrier(ictxt,'A')
if (.true.) then
allocate(ivg(m_problem),ipv(np))
do i=1,m_problem
call part_block(i,m_problem,np,ipv,nv)
ivg(i) = ipv(1)
enddo
call matdist(aux_a, a, ivg, ictxt, &
& desc_a,b_col_glob,b_col,info,fmt=afmt)
else
call matdist(aux_a, a, part_block, ictxt, &
& desc_a,b_col_glob,b_col,info,fmt=afmt)
endif
else if (ipart.eq.1) then
call blacs_barrier(ictxt,'A')
call matdist(aux_a, a, part_blk2, ictxt, &
& desc_a,b_col_glob,b_col,info,fmt=afmt)
else if (ipart.eq.2) then
if (amroot) then
call build_grppart(aux_a%m,aux_a%fida,aux_a%ia1,aux_a%ia2,np)
endif
call distr_grppart(0,0,ictxt)
if (.false.) then
call matdist(aux_a, a, part_graph, ictxt, &
& desc_a,b_col_glob,b_col,info,fmt=afmt)
else
call getv_grppart(ivg)
call matdist(aux_a, a, ivg, ictxt, &
& desc_a,b_col_glob,b_col,info,fmt=afmt)
endif
else
call matdist(aux_a, a, part_block, ictxt, &
& desc_a,b_col_glob,b_col,info,fmt=afmt)
end if
if(info /= 0) then
info=4010
ch_err='matdist'
goto 9999
end if
call f90_psdsall(m_problem,x_col,desc_a,info)
if(info /= 0) then
info=4010
ch_err='f90_psdsall'
goto 9999
end if
x_col(:) =0.0
call f90_psdsasb(x_col,desc_a,info)
call f90_psdsasb(b_col,desc_a,info)
if(info /= 0) then
info=4010
ch_err='f90_psdsasb'
goto 9999
end if
call f90_psdsall(m_problem,r_col,desc_a,info)
if(info /= 0) then
info=4010
ch_err='f90_psdsall'
goto 9999
end if
r_col(:) =0.0
call f90_psdsasb(r_col,desc_a,info)
if(info /= 0) then
info=4010
ch_err='f90_psdsasb'
goto 9999
end if
t2 = mpi_wtime() - t1
dim=size(a%aspk)
call dgamx2d(ictxt, 'a', ' ', ione, ione, t2, ione,&
& t1, t1, -1, -1, -1)
if (amroot) then
write(6,*) 'time to Read and Partition Matrix : ',T2
END IF
!!$ call blacs_barrier(ictxt,'all')
!!$ do i=0, nprow-1
!!$ if (myprow==i) then
!!$ write(6,*) 'Main descriptor for process ',i,' ',mtrx_file
!!$ call descprt(6,desc_a,short=.true.)
!!$ endif
!!$ call blacs_barrier(ictxt,'all')
!!$ enddo
!
! Prepare the preconditioning matrix. Note the availability
! of optional parameters
!
do ip=1,nprecs
pre%prec=precs(ip)
if (precs(ip) > 2) then
no=novrs
else
no=1
endif
do io=1, no
ttot = 1.d300
do itryslv=1,ntryslv
! Zero initial guess.
select case(precs(ip))
case(noprec_)
ptype='noprec'
call psb_precset(pre,ptype)
case(diagsc_)
ptype='diagsc'
call psb_precset(pre,ptype)
case(bja_)
ptype='bja'
call psb_precset(pre,ptype)
case(asm_)
ptype='asm'
call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,sum_/))
case(ash_)
ptype='asm'
call psb_precset(pre,ptype,iv=(/ovrs(io),nohalo_,sum_/))
case(ras_)
ptype='asm'
call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/))
case(50+ras_)
pre%prec = ras_
ptype='asm'
call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_,f_slu_/))
case(rash_)
ptype='asm'
call psb_precset(pre,ptype,iv=(/ovrs(io),nohalo_,none_/))
case(ras2lv_)
ptype='asm'
call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/))
ptype='ml'
call psb_precset(pre,ptype,&
&iv=(/add_ml_prec_,glb_aggr_,pre_smooth_,igsmth,matop/),rs=0.d0)
case(ras2lvm_)
ptype='asm'
call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/))
ptype='ml'
call psb_precset(pre,ptype,&
& iv=(/mult_ml_prec_,glb_aggr_,pre_smooth_,igsmth,matop/),rs=0.d0)
case(lv2mras_)
ptype='asm'
call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/))
ptype='ml'
call psb_precset(pre,ptype,&
& iv=(/mult_ml_prec_,glb_aggr_,post_smooth_,igsmth,matop/),rs=0.d0)
case(50+lv2mras_)
pre%prec = lv2mras_
ptype='asm'
call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/))
ptype='ml'
call psb_precset(pre,ptype,&
& iv=(/mult_ml_prec_,glb_aggr_,post_smooth_,igsmth,matop,f_slu_/),&
& rs=0.d0)
case(lv2smth_)
ptype='asm'
call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/))
ptype='ml'
if (omega>0.0d0) then
call psb_precset(pre,'ml',&
& iv=(/mult_ml_prec_,glb_aggr_,post_smooth_,igsmth,matop/),&
& rs=omega)
else
call psb_precset(pre,'ml',&
& iv=(/mult_ml_prec_,glb_aggr_,post_smooth_,igsmth,matop/))
endif
case(50+lv2smth_)
pre%prec = lv2smth_
ptype='asm'
call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/))
ptype='ml'
if (omega>0.0d0) then
call psb_precset(pre,ptype,&
& iv=(/mult_ml_prec_,glb_aggr_,post_smooth_,igsmth,matop,f_slu_/),&
& rs=omega)
else
call psb_precset(pre,ptype,&
& iv=(/mult_ml_prec_,glb_aggr_,post_smooth_,igsmth,matop,f_slu_/))
endif
case(lv2lsm_)
ptype='asm'
call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/))
ptype='ml'
if (omega>0.0d0) then
call psb_precset(pre,ptype,&
& iv=(/mult_ml_prec_,loc_aggr_,post_smooth_,igsmth,matop/),&
& rs=omega)
else
call psb_precset(pre,ptype,&
& iv=(/mult_ml_prec_,loc_aggr_,post_smooth_,igsmth,matop/))
endif
case(50+lv2lsm_)
pre%prec = lv2lsm_
ptype='asm'
call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/))
ptype='ml'
if (omega>0.0d0) then
call psb_precset(pre,ptype,&
& iv=(/mult_ml_prec_,loc_aggr_,post_smooth_,igsmth,matop,f_slu_/),&
& rs=omega)
else
call psb_precset(pre,ptype,&
& iv=(/mult_ml_prec_,loc_aggr_,post_smooth_,igsmth,matop,f_slu_/))
endif
case(sl2sm_)
ptype='asm'
call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,sum_/))
ptype='ml'
if (omega>0.0d0) then
call psb_precset(pre,ptype,&
& iv=(/add_ml_prec_,loc_aggr_,post_smooth_,igsmth,matop/),rs=omega)
else
call psb_precset(pre,ptype,&
& iv=(/add_ml_prec_,loc_aggr_,post_smooth_,igsmth,matop/))
endif
case(new_loc_smth_)
ptype='asm'
call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/))
ptype='ml'
if (omega>0.0d0) then
call psb_precset(pre,ptype,&
& iv=(/new_ml_prec_,new_loc_aggr_,post_smooth_,1,&
& matop,f_ilu_n_,jacswp/), rs=omega)
else
call psb_precset(pre,ptype,&
& iv=(/new_ml_prec_,new_loc_aggr_,post_smooth_,1,&
& matop,f_ilu_n_,jacswp/))
endif
case(50+new_loc_smth_)
pre%prec = new_loc_smth_
ptype='asm'
call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/))
ptype='ml'
if (omega>0.0d0) then
call psb_precset(pre,ptype,&
& iv=(/new_ml_prec_,new_loc_aggr_,post_smooth_,1,&
& matop,f_slu_,jacswp/), rs=omega)
else
call psb_precset(pre,ptype,&
& iv=(/new_ml_prec_,new_loc_aggr_,post_smooth_,1,&
& matop,f_slu_,jacswp/))
endif
case(new_glb_smth_)
ptype='asm'
call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/))
ptype='ml'
if (omega>0.0d0) then
call psb_precset(pre,ptype,&
& iv=(/new_ml_prec_,new_glb_aggr_,post_smooth_,1,&
& matop,f_ilu_n_,1/), rs=omega)
else
call psb_precset(pre,ptype,&
& iv=(/new_ml_prec_,new_glb_aggr_,post_smooth_,1,&
& matop,f_ilu_n_,1/))
endif
case(50+new_glb_smth_)
pre%prec = new_glb_smth_
ptype='asm'
call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/))
ptype='ml'
if (omega>0.0d0) then
call psb_precset(pre,ptype,&
& iv=(/new_ml_prec_,new_glb_aggr_,post_smooth_,1,&
& matop,f_slu_,1/), rs=omega)
else
call psb_precset(pre,ptype,&
& iv=(/new_ml_prec_,new_glb_aggr_,post_smooth_,1,&
& matop,f_slu_,1/))
endif
case default
write(0,*) 'Unknown iprec, defaulting to BJA'
ptype='bja'
call psb_precset(pre,ptype)
end select
call blacs_barrier(ictxt,'All')
call f90_psaxpby(0.d0,b_col,0.d0,x_col,desc_a,info)
if(info /= 0) then
info=4010
ch_err='f90_psaxpby'
goto 9999
end if
call blacs_barrier(ictxt,'All')
t1 = mpi_wtime()
call psb_precbld(a,pre,desc_a,info)!,'f')
t2 = mpi_wtime()-t1
if(info /= 0) then
info=4010
ch_err='psb_precbld'
goto 9999
end if
call dgamx2d(ictxt,'a',' ',ione, ione,t2,ione,t1,t1,-1,-1,-1)
if (info /= 0) then
write(0,*) 'error in preconditioner :',info
call blacs_abort(ictxt,-1)
stop
end if
iparm = 0
rparm = 0.d0
call blacs_barrier(ictxt,'all')
t1 = mpi_wtime()
if (cmethd.eq.'CG') Then
call f90_cg(a,pre,b_col,x_col,eps,desc_a,info,&
& itmax,iter,err,itrace,istop=istopc)
if(info /= 0) then
info=4010
ch_err='f90_cg'
goto 9999
end if
else if (cmethd.eq.'BICGSTAB') Then
call f90_bicgstab(a,pre,b_col,x_col,eps,desc_a,info,&
& itmax,iter,err,itrace,istop=istopc)
if(info /= 0) then
info=4010
ch_err='f90_bicgstab'
goto 9999
end if
ELSE IF (CMETHD.EQ.'BICG') Then
call f90_bicg(a,pre,b_col,x_col,eps,desc_a,info,&
& itmax,iter,err,itrace,istop=istopc)
if(info /= 0) then
info=4010
ch_err='f90_bicg'
goto 9999
end if
ELSE IF (CMETHD.EQ.'CGS') Then
call f90_cgs(a,pre,b_col,x_col,eps,desc_a,info,&
& itmax,iter,err,itrace,istop=istopc)
if(info /= 0) then
info=4010
ch_err='f90_cg'
goto 9999
end if
ELSE IF (CMETHD.EQ.'GMRES') Then
call f90_rgmres(a,pre,b_col,x_col,eps,desc_a,info,&
& itmax,iter,err,itrace,irst=irst,istop=istopc)
if(info /= 0) then
info=4010
ch_err='f90_rgmres'
goto 9999
end if
else
write(*,*) 'Unknown method : "',cmethd,'"'
ENDIF
call blacs_barrier(ictxt,'all')
t3 = mpi_wtime() - t1
call dgamx2d(ictxt,'a',' ',ione, ione,t3,ione,t1,t1,-1,-1,-1)
if (amroot) then
write(10,'(a18,3(1x,i2),1x,i5,5(1x,g9.4))') mtrx_name,nprow,&
& precs(ip),pre%baseprec%iprcparm(n_ovr_),iter,t2,t3,t2+t3
endif
if (itryslv < ntryslv) then
call psb_precfree(pre,info)
if(info /= 0) then
info=4010
ch_err='psb_precfree'
goto 9999
end if
else
if (amroot) call prec_descr(6,pre)
end if
if ((t2+t3)<ttot) then
tprec = t2
tslv = t3
ttot = t2+t3
end if
end do
call f90_psaxpby(one,b_col,dzero,r_col,desc_A,info)
call f90_psspmm(-one,a,x_col,one,r_col,desc_a,info)
call f90_nrm2(resmx,r_col,desc_a,info)
call f90_amax(resmxp,r_col,desc_a,info)
if(info /= 0) then
info=4011
goto 9999
end if
if (amroot) then
write(6,*) ' iprec istopc : ',precs(ip), istopc, ovrs(io)
!!$! write(6,*) 'Number of iterations : ',iter
!!$! WRITE(6,*) 'Error on exit : ',ERR
write(6,*) 'Matrix: ',mtrx_name
write(6,*) 'Computed solution on ',NPROW,' processors.'
write(6,*) 'Iterations to convergence: ',iter
write(6,*) 'Error indicator on exit:',err
write(6,*) 'Time to Buil Prec. : ',TPREC
write(6,*) 'Time to Solve Matrix : ',TSLV
WRITE(6,*) 'Time per iteration : ',TSLV/(ITER)
write(6,*) 'Total Time : ',ttot
write(6,*) 'Residual norm 2 = ',resmx
write(6,*) 'Residual norm inf = ',resmxp
write(6,*)
write(6,*)
write(8,'(a18,3(1x,i2),1x,i5,5(1x,g9.4))') mtrx_name,nprow,precs(ip),&
& pre%baseprec%iprcparm(n_ovr_),&
& iter,tprec,tslv,ttot,resmx,resmxp
if (associated(pre%smthprec)) then
write(11,'(a18,9(1x,i2),1(1x,g9.4),1x,i5,5(1x,g9.4))') &
& mtrx_name,nprow,&
& pre%baseprec%iprcparm(p_type_),pre%baseprec%iprcparm(n_ovr_),&
& pre%baseprec%iprcparm(restr_),pre%baseprec%iprcparm(prol_),&
& pre%smthprec%iprcparm(ml_type_),pre%smthprec%iprcparm(aggr_alg_),&
& pre%smthprec%iprcparm(smth_pos_),pre%smthprec%iprcparm(glb_smth_),&
& pre%smthprec%dprcparm(smooth_omega_),&
& iter,tprec,tslv,ttot,resmx,resmxp
else
write(11,'(a18,9(1x,i2),1(1x,g9.4),1x,i5,5(1x,g9.4))') &
& mtrx_name,nprow,&
& pre%baseprec%iprcparm(p_type_),pre%baseprec%iprcparm(n_ovr_),&
& pre%baseprec%iprcparm(prol_),pre%baseprec%iprcparm(restr_),&
& 0, 0, 0,&
& 0, 0.0,&
& iter,tprec,tslv,ttot,resmx,resmxp
end if
end if
call psb_precfree(pre,info)
if(info /= 0) then
info=4011
goto 9999
end if
end do
end do
deallocate(aux_b,stat=info)
if (amroot) call spfree(aux_a,info)
!!$
call f90_psdsfree(b_col, desc_a,info)
call f90_psdsfree(x_col, desc_a,info)
call f90_psdsfree(r_col, desc_a,info)
call f90_psspfree(a, desc_a,info)
call f90_psdscfree(desc_a,info)
if(info /= 0) then
info=4011
goto 9999
end if
end do
deallocate(ovrs,precs,stat=info)
write(0,*) 'Info from deallocate ',info
9999 continue
if(info /= 0) then
call psb_errpush(info,name,a_err=ch_err)
call psb_error(ictxt)
call blacs_gridexit(ictxt)
call blacs_exit(0)
else
call blacs_gridexit(ictxt)
call blacs_exit(0)
end if
end program df_bench

@ -1,288 +0,0 @@
program df_comm
use f90sparse
use mat_dist
use read_mat
use partgraph
use comminfo
implicit none
! input parameters
character(len=20) :: cmethd, prec
character(len=40) :: mtrx_file
character(len=20) :: mtrx_name, out_file, tmpstr
character(len=200) :: charbuf
integer :: inparms(20)
double precision ddot
external ddot
integer, parameter :: izero=0, ione=1
character, parameter :: order='r'
real(kind(1.d0)), pointer, save :: b_col(:), x_col(:), r_col(:), &
& b_col_glob(:), x_col_glob(:), r_col_glob(:), b_glob(:,:), &
&z(:), q(:),z1(:), xm(:,:), ym(:,:)
integer :: iargc, check_descr, convert_descr
real(kind(1.d0)), parameter :: dzero = 0.d0, one = 1.d0
real(kind(1.d0)) :: mpi_wtime, t1, t2, t3, tprec, tslv, ttot, &
&r_amax, b_amax,bb(1,1), lambda,scale,resmx,resmxp, omega
integer :: nrhs, nrow, nx1, nx2, n_row, n_col, dim,iread,ip,io,no,nmats,&
& imat,irenum, igsmth, matop
logical :: amroot
external iargc, mpi_wtime
integer bsze,overlap, nn, nprecs, novrs
common/part/bsze,overlap
integer, pointer :: work(:), precs(:), ovrs(:), comm_info(:,:)
! sparse matrices
type(d_spmat) :: a, aux_a, h
type(d_prec) :: pre
!!$ type(d_precn) :: aprc
! dense matrices
real(kind(1.d0)), pointer :: aux_b(:,:) , aux1(:), aux2(:), vdiag(:), &
& aux_g(:,:), aux_x(:,:), d(:)
! communications data structure
type(desc_type) :: desc_a, desc_a_out
! blacs parameters
integer :: nprow, npcol, ictxt, iam, np, myprow, mypcol
! solver paramters
integer :: iter, itmax, ierr, itrace, ircode, ipart,&
& methd, istopc, irst
integer, pointer :: ierrv(:), ivg(:)
character(len=5) :: afmt
real(kind(1.d0)) :: err, eps
integer iparm(20)
real(kind(1.d0)) rparm(20)
! other variables
integer :: i,info,j, ntryslv
integer :: internal, m,ii,nnzero, itryslv
integer, parameter :: ncsw=4, ntcs=4
real(kind(1.d0)), pointer :: tsthal(:,:)
real(kind(1.d0)) :: tswmpi(ntcs,ncsw),tswsyn(ntcs,ncsw),tswsdrv(ntcs,ncsw)
! common area
integer m_problem, nproc, nc
allocate(ierrv(6))
! initialize blacs
call blacs_pinfo(iam, np)
call blacs_get(izero, izero, ictxt)
! rectangular Grid, Np x 1
call blacs_gridinit(ictxt, order, np, ione)
call blacs_gridinfo(ictxt, nprow, npcol, myprow, mypcol)
amroot = (myprow==0).and.(mypcol==0)
afmt='CSR'
!
! Get parameters from file
!
if (amroot) then
read(*,*) ipart
call igebs2d(ictxt,'all',' ',1,1,ipart,1)
read(*,*) nmats
call igebs2d(ictxt,'all',' ',1,1,nmats,1)
else
call igebr2d(ictxt,'all',' ',1,1,ipart,1,0,0)
call igebr2d(ictxt,'all',' ',1,1,nmats,1,0,0)
endif
do imat=1,nmats
if (amroot) then
!!$ read(*,*) mtrx_file,rhs_file
read(*,'(a)') charbuf
charbuf=adjustl(charbuf)
i=index(charbuf," ")
mtrx_file=charbuf(1:i-1)
i=index(mtrx_file,"/",back=.true.)
mtrx_name=trim(mtrx_file(i+1:))
write(0,*) 'Read mtx rhs : "', mtrx_file
i=index(mtrx_file,'.mtx')
write(tmpstr,'("_",i1,"_",i2.2,".out")')ipart,np
out_file=mtrx_file(1:i-1)//trim(tmpstr)
write(0,*)' out file is ', out_file
open(2,file=out_file,action='write')
write(2,'("Matrix: ",a20)')mtrx_name
write(2,'("Number of processes: ",i2)')np
write(2,'("Partitioning: ",i1)')ipart
write(2,'(" ")')
write(2,'(" ")')
close(2)
endif
call blacs_barrier(ictxt,'A')
t1 = mpi_wtime()
! read the input matrix to be processed and (possibly) the RHS
nrhs = 1
nproc = nprow
if (amroot) then
nullify(aux_b)
call readmat(mtrx_file, aux_a, ictxt)
m_problem = aux_a%m
call igebs2d(ictxt,'a',' ',1,1,m_problem,1)
allocate(aux_b(m_problem,1), stat=ircode)
if (ircode /= 0) then
write(0,*) 'memory allocation failure in df_sample'
call blacs_abort(ictxt,-1)
stop
endif
b_col_glob =>aux_b(:,1)
do i=1, m_problem
b_col_glob(i) = 1.d0
enddo
call dgebs2d(ictxt,'a',' ',m_problem,1,b_col_glob,m_problem)
else
call igebr2d(ictxt,'a',' ',1,1,m_problem,1,0,0)
allocate(aux_b(m_problem,1), stat=ircode)
if (ircode /= 0) then
write(0,*) 'Memory allocation failure in DF_SAMPLE'
call blacs_abort(ictxt,-1)
stop
endif
b_col_glob =>aux_b(:,1)
call dgebr2d(ictxt,'A',' ',m_problem,1,b_col_glob,m_problem,0,0)
end if
! switch over different partition types
if (ipart.eq.0) then
call blacs_barrier(ictxt,'A')
write(0,*) 'Partition type: BLOCK'
allocate(ivg(m_problem))
call bld_partblock(m_problem,np,ivg)
call matdist(aux_a, a, ivg, ictxt, &
& desc_a,b_col_glob,b_col,fmt=afmt)
else if (ipart.eq.1) then
call blacs_barrier(ictxt,'A')
write(0,*) 'partition type: BLK2'
allocate(ivg(m_problem))
call bld_partblk2(m_problem,np,ivg)
call matdist(aux_a, a, ivg, ictxt, &
& desc_a,b_col_glob,b_col,fmt=afmt)
else if (ipart.eq.2) then
write(0,*) 'partition type: GRAPH'
if (amroot) then
call build_grppart(aux_a%m,aux_a%fida,aux_a%ia1,aux_a%ia2,np)
endif
call distr_grppart(0,0,ictxt)
if (.false.) then
call matdist(aux_a, a, part_graph, ictxt, &
& desc_a,b_col_glob,b_col,fmt=afmt)
else
call getv_grppart(ivg)
call matdist(aux_a, a, ivg, ictxt, &
& desc_a,b_col_glob,b_col,fmt=afmt)
endif
end if
if(amroot) then
allocate(comm_info(np,np))
comm_info(:,:)=0
end if
call blacs_barrier(ictxt,'all')
write(0,'("Getting communication info")')
call get_comminfo(ictxt,desc_a,comm_info)
if(amroot) then
open(2,file=out_file,action='write',position='append')
write(2,'("Exchange table:")')
do i=1,np
write(2,*)'Row ',i,' : ',comm_info(i,:)
end do
write(2,'(" ")')
write(2,'(" ")')
close(2)
end if
n_row = desc_a%matrix_data(n_row_)
n_col = desc_a%matrix_data(n_col_)
write(0,'("Allocating vectors")')
call f90_psdsall(m_problem,ncsw,tsthal,ierrv,desc_a)
forall (i=1:n_row)
forall (j=1:ncsw)
tsthal(i,j) = j * desc_a%loc_to_glob(i)
end forall
end forall
tsthal(n_row+1:,:) = -1.d0
tswmpi = 1.d200
tswsyn = 1.d200
tswsdrv = 1.d200
write(0,'("Cycling")')
do nc=1, ncsw
do i=1, ntcs
tsthal(n_row+1:,:) = -1.d0
t1=mpi_wtime()
call f90_pshalo(tsthal(:,1:nc),desc_a,trans='T',mode=SWAP_MPI)
t2=mpi_wtime()-t1
call dgamx2d(ictxt,'a',' ',ione, ione,t2,ione,t1,t1,-1,-1,-1)
tswmpi(i,nc) = t2
!! Add correctness check
tsthal(n_row+1:,:) = -1.d0
t1=mpi_wtime()
call f90_pshalo(tsthal(:,1:nc),desc_a,trans='T',mode=SWAP_SYNC)
t2=mpi_wtime()-t1
call dgamx2d(ictxt,'a',' ',ione, ione,t2,ione,t1,t1,-1,-1,-1)
tswsyn(i,nc) = t2
!! Add correctness check
tsthal(n_row+1:,:) = -1.d0
t1=mpi_wtime()
call f90_pshalo(tsthal(:,1:nc),desc_a,trans='T',mode=IOR(SWAP_SEND,SWAP_RECV))
t2=mpi_wtime()-t1
call dgamx2d(ictxt,'a',' ',ione, ione,t2,ione,t1,t1,-1,-1,-1)
tswsdrv(i,nc) = t2
!! Add correctness check
end do
end do
if (amroot) then
open(2,file=out_file,action='write',position='append')
do nc=1, ncsw
write(*,'(a18,1x,a4,1(1x,i2),1x,i5,5(1x,g9.4))') mtrx_name,'MPI',&
& nprow,nc,minval(tswmpi(:,nc)),maxval(tswmpi(:,nc)),&
& sum(tswmpi(:,nc))/ntcs
write(2,'(a18,1x,a4,1(1x,i2),1x,i5,5(1x,g9.4))') mtrx_name,'MPI',&
& nprow,nc,minval(tswmpi(:,nc)),maxval(tswmpi(:,nc)),&
& sum(tswmpi(:,nc))/ntcs
write(*,'(a18,1x,a4,1(1x,i2),1x,i5,5(1x,g9.4))') mtrx_name,'SYNC',&
& nprow,nc,minval(tswsyn(:,nc)),maxval(tswsyn(:,nc)),&
& sum(tswsyn(:,nc))/ntcs
write(2,'(a18,1x,a4,1(1x,i2),1x,i5,5(1x,g9.4))') mtrx_name,'SYNC',&
& nprow,nc,minval(tswsyn(:,nc)),maxval(tswsyn(:,nc)),&
& sum(tswsyn(:,nc))/ntcs
write(*,'(a18,1x,a4,1(1x,i2),1x,i5,5(1x,g9.4))') mtrx_name,'SDRV',&
& nprow,nc,minval(tswsdrv(:,nc)),maxval(tswsdrv(:,nc)),&
& sum(tswsdrv(:,nc))/ntcs
write(2,'(a18,1x,a4,1(1x,i2),1x,i5,5(1x,g9.4))') mtrx_name,'SDRV',&
& nprow,nc,minval(tswsdrv(:,nc)),maxval(tswsdrv(:,nc)),&
& sum(tswsdrv(:,nc))/ntcs
end do
close(2)
end if
call f90_psdsfree(tsthal, desc_a)
call f90_psdsfree(b_col, desc_a)
call f90_psspfree(a, desc_a)
call f90_psdscfree(desc_a,info)
end do
deallocate(ovrs,precs,ierrv, stat=info)
write(0,*) 'Info from deallocate ',info
call blacs_gridexit(ictxt)
call blacs_exit(0)
end program df_comm

@ -1,350 +0,0 @@
PROGRAM DF_SAMPLE
USE F90SPARSE
USE MAT_DIST
USE READ_MAT
USE PARTGRAPH
USE GETP
use mpi
IMPLICIT NONE
! Input parameters
CHARACTER*20 :: CMETHD, PREC, MTRX_FILE, RHS_FILE
CHARACTER*80 :: CHARBUF
DOUBLE PRECISION DDOT
EXTERNAL DDOT
INTERFACE
! .....user passed subroutine.....
SUBROUTINE PART_BLOCK(GLOBAL_INDX,N,NP,PV,NV)
IMPLICIT NONE
INTEGER, INTENT(IN) :: GLOBAL_INDX, N, NP
INTEGER, INTENT(OUT) :: NV
INTEGER, INTENT(OUT) :: PV(*)
END SUBROUTINE PART_BLOCK
END INTERFACE ! Local variables
INTEGER, PARAMETER :: IZERO=0, IONE=1
CHARACTER, PARAMETER :: ORDER='R'
REAL(KIND(1.D0)), POINTER, SAVE :: B_COL(:), X_COL(:), R_COL(:), &
& B_COL_GLOB(:), X_COL_GLOB(:), R_COL_GLOB(:), B_GLOB(:,:), &
&Z(:), Q(:),Z1(:)
INTEGER :: IARGC, CHECK_DESCR, CONVERT_DESCR
Real(Kind(1.d0)), Parameter :: Dzero = 0.d0, One = 1.d0
Real(Kind(1.d0)) :: T1, T2, TPREC, R_AMAX, B_AMAX,bb(1,1),&
&lambda,scale,resmx,resmxp
integer :: nrhs, nrow, nx1, nx2, n_row, dim,iread
logical :: amroot
integer mpe_log_get_event_number,mpe_Describe_state,mpe_log_event
External IARGC
integer bsze,overlap
common/part/bsze,overlap
INTEGER, POINTER :: WORK(:)
! Sparse Matrices
TYPE(D_SPMAT) :: A, AUX_A, H
TYPE(D_PREC) :: PRE
!!$ TYPE(D_PRECN) :: APRC
! Dense Matrices
REAL(KIND(1.D0)), POINTER :: AUX_B(:,:) , AUX1(:), AUX2(:), VDIAG(:), &
& AUX_G(:,:), AUX_X(:,:), D(:)
! Communications data structure
TYPE(desc_type) :: DESC_A, DESC_A_OUT
! BLACS parameters
INTEGER :: NPROW, NPCOL, ICTXT, IAM, NP, MYPROW, MYPCOL
! Solver paramters
INTEGER :: ITER, ITMAX, IERR, ITRACE, IRCODE, IPART,&
& METHD, ISTOPC, ML
integer, pointer :: ierrv(:)
REAL(KIND(1.D0)) :: ERR, EPS
integer iparm(20)
real(kind(1.d0)) rparm(20)
! Other variables
INTEGER :: I,INFO,J, iprecb,iprece,islvb,islve
INTEGER :: INTERNAL, M,II,NNZERO
! common area
INTEGER M_PROBLEM, NPROC
allocate(ierrv(6))
! Initialize BLACS
CALL BLACS_PINFO(IAM, NP)
CALL BLACS_GET(IZERO, IZERO, ICTXT)
iprecb = mpe_log_get_event_number()
iprece = mpe_log_get_event_number()
islvb = mpe_log_get_event_number()
islve = mpe_log_get_event_number()
if (iam==0) then
info = mpe_describe_state(iprecb,iprece,"Preconditioner","OrangeRed")
info = mpe_describe_state(islvb,islve,"Solver","DarkGreen")
endif
! Rectangular Grid, Np x 1
CALL BLACS_GRIDINIT(ICTXT, ORDER, NP, IONE)
CALL BLACS_GRIDINFO(ICTXT, NPROW, NPCOL, MYPROW, MYPCOL)
AMROOT = (MYPROW==0).AND.(MYPCOL==0)
!
! Get parameters
!
CALL GET_PARMS(ICTXT,MTRX_FILE,RHS_FILE,CMETHD,PREC,&
& IPART,ISTOPC,ITMAX,ITRACE,ML,PRE%PREC,EPS)
CALL BLACS_BARRIER(ICTXT,'A')
T1 = MPI_WTIME()
! Read the input matrix to be processed and (possibly) the RHS
NRHS = 1
NPROC = NPROW
IF (AMROOT) THEN
NULLIFY(AUX_B)
CALL READMAT(MTRX_FILE, AUX_A, ICTXT)
M_PROBLEM = AUX_A%M
CALL IGEBS2D(ICTXT,'A',' ',1,1,M_PROBLEM,1)
IF(RHS_FILE /= 'NONE') THEN
! Reading an RHS
CALL READ_RHS(RHS_FILE,AUX_B,ICTXT)
END IF
IF (ASSOCIATED(AUX_B).and.SIZE(AUX_B,1)==M_PROBLEM) THEN
! If any RHS were present, broadcast the first one
write(0,*) 'Ok, got an RHS ',aux_b(m_problem,1)
B_COL_GLOB =>AUX_B(:,1)
ELSE
write(0,*) 'Inventing an RHS '
ALLOCATE(AUX_B(M_PROBLEM,1), STAT=IRCODE)
IF (IRCODE /= 0) THEN
WRITE(0,*) 'Memory allocation failure in DF_SAMPLE'
CALL BLACS_ABORT(ICTXT,-1)
STOP
ENDIF
B_COL_GLOB =>AUX_B(:,1)
DO I=1, M_PROBLEM
B_COL_GLOB(I) = REAL(I)*2.0/REAL(M_PROBLEM)
ENDDO
ENDIF
CALL DGEBS2D(ICTXT,'A',' ',M_PROBLEM,1,B_COL_GLOB,M_PROBLEM)
ELSE
CALL IGEBR2D(ICTXT,'A',' ',1,1,M_PROBLEM,1,0,0)
WRITE(0,*) 'Receiving AUX_B'
ALLOCATE(AUX_B(M_PROBLEM,1), STAT=IRCODE)
IF (IRCODE /= 0) THEN
WRITE(0,*) 'Memory allocation failure in DF_SAMPLE'
CALL BLACS_ABORT(ICTXT,-1)
STOP
ENDIF
B_COL_GLOB =>AUX_B(:,1)
CALL DGEBR2D(ICTXT,'A',' ',M_PROBLEM,1,B_COL_GLOB,M_PROBLEM,0,0)
END IF
! Switch over different partition types
IF (IPART.EQ.0) THEN
CALL BLACS_BARRIER(ICTXT,'A')
WRITE(6,*) 'Partition type: BLOCK'
CALL MATDIST(AUX_A, A, PART_BLOCK, ICTXT, &
& DESC_A,B_COL_GLOB,B_COL)
ELSE IF (IPART.EQ.2) THEN
WRITE(0,*) 'Partition type: GRAPH'
IF (AMROOT) THEN
!!$ WRITE(0,*) 'Call BUILD',size(aux_a%ia1),size(aux_a%ia2),np
CALL BUILD_GRPPART(AUX_A%M,AUX_A%FIDA,AUX_A%IA1,AUX_A%IA2,NP)
ENDIF
CALL BLACS_BARRIER(ICTXT,'A')
!!$ WRITE(0,*) myprow,'Done BUILD_GRPPART'
CALL DISTR_GRPPART(0,0,ICTXT)
!!$ WRITE(0,*) myprow,'Done DISTR_GRPPART'
CALL MATDIST(AUX_A, A, PART_GRAPH, ICTXT, &
& DESC_A,B_COL_GLOB,B_COL)
ELSE
WRITE(6,*) 'Partition type: BLOCK'
CALL MATDIST(AUX_A, A, PART_BLOCK, ICTXT, &
& DESC_A,B_COL_GLOB,B_COL)
END IF
CALL F90_PSDSALL(M_PROBLEM,X_COL,IERRV,DESC_A)
X_COL(:) =0.0
CALL F90_PSDSASB(X_COL,IERRV,DESC_A)
CALL F90_PSDSALL(M_PROBLEM,R_COL,IERRV,DESC_A)
R_COL(:) =0.0
CALL F90_PSDSASB(R_COL,IERRV,DESC_A)
T2 = MPI_WTIME() - T1
DIM=SIZE(A%ASPK)
!!$ ALLOCATE(H%ASPK(DIM),H%IA1(DIM),H%IA2(DIM),H%PL(SIZE(A%PL)),&
!!$ & H%PL(SIZE(A%PL)),D(SIZE(A%PL)),&
!!$ & DESC_A_OUT%MATRIX_DATA(SIZE(DESC_A%MATRIX_DATA)),&
!!$ & DESC_A_OUT%HALO_INDEX(SIZE(DESC_A%HALO_INDEX)),&
!!$ & DESC_A_OUT%OVRLAP_INDEX(SIZE(DESC_A%OVRLAP_INDEX)),&
!!$ & DESC_A_OUT%OVRLAP_ELEM(SIZE(DESC_A%OVRLAP_ELEM)),&
!!$ & DESC_A_OUT%LOC_TO_GLOB(SIZE(DESC_A%LOC_TO_GLOB)),&
!!$ & DESC_A_OUT%GLOB_TO_LOC(SIZE(DESC_A%GLOB_TO_LOC)), WORK(dim))
!!$ check_descr=15
! work(5)=9
!!$ WRITE(0,*)'CALLING VERIFY'
!!$ CALL F90_PSVERIFY(D,A,DESC_A,CHECK_DESCR,CONVERT_DESCR,H,&
!!$ & DESC_A_OUT,WORK)
!!$ WRITE(0,*)'VERIFY DONE',CONVERT_DESCR
! deallocate(work)
CALL DGAMX2D(ICTXT, 'A', ' ', IONE, IONE, T2, IONE,&
& T1, T1, -1, -1, -1)
IF (AMROOT) THEN
WRITE(6,*) 'Time to Read and Partition Matrix : ',T2
END IF
!
! Prepare the preconditioning matrix. Note the availability
! of optional parameters
!
IF (AMROOT) WRITE(6,*) 'Preconditioner : "',PREC(1:6),'" ',PRE%PREC
!!$ do i=1,a%m
!!$ do j=a%ia2(i),a%ia2(i+1)-1
!!$ write(0,*)'a ',i,a%ia1(j),a%aspk(j)
!!$ end do
!!$ end do
!!$
!!$ write(0,*)'halo_index',desc_a%halo_index(:)
!!$ write(0,*)'ovrlap_index',desc_a%ovrlap_index(:)
!!$ write(0,*)'ovrlap_elem',desc_a%ovrlap_elem(:)
info = MPE_Log_event( iprecb, 0, "start Precond" )
T1 = MPI_WTIME()
CALL PRECONDITIONER(A,PRE,DESC_A,INFO)!,'F')
TPREC = MPI_WTIME()-T1
info = MPE_Log_event( iprece, 0, "end Precond" )
CALL DGAMX2D(ICTXT,'A',' ',IONE, IONE,TPREC,IONE,T1,T1,-1,-1,-1)
WRITE(0,*) 'Preconditioner Time :',TPREC,' ',&
&prec,pre%prec
IF (INFO /= 0) THEN
WRITE(0,*) 'Error in preconditioner :',INFO
CALL BLACS_ABORT(ICTXT,-1)
STOP
END IF
IPARM = 0
RPARM = 0.D0
CALL BLACS_BARRIER(ICTXT,'All')
info = MPE_Log_event( islvb, 0, "start Solver" )
T1 = MPI_WTIME()
IF (CMETHD.EQ.'BICGSTAB') Then
CALL F90_BICGSTAB(A,PRE,B_COL,X_COL,EPS,DESC_A,&
& ITMAX,ITER,ERR,IERR,ITRACE)
!!$ ELSE IF (CMETHD.EQ.'BICG') Then
!!$ CALL F90_BICG(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,&
!!$ & ITMAX,ITER,ERR,IERR,ITRACE)
!!$ ELSE IF (CMETHD.EQ.'CGS') Then
!!$ CALL F90_CGS(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,&
!!$ & ITMAX,ITER,ERR,IERR,ITRACE)
!!$ ELSE IF (CMETHD.EQ.'BICGSTABL') Then
!!$ CALL F90_BICGSTABL(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,&
!!$ & ITMAX,ITER,ERR,IERR,ITRACE,ML)
ENDIF
T2 = MPI_WTIME() - T1
info = MPE_Log_event( islve, 0, "end Solver" )
CALL DGAMX2D(ICTXT,'A',' ',IONE, IONE,T2,IONE,T1,T1,-1,-1,-1)
call f90_psaxpby(1.d0,b_col,0.d0,r_col,desc_A)
call f90_psspmm(-1.d0,a,x_col,1.d0,r_col,desc_a)
call f90_amax(resmx,r_col,desc_a)
where (b_col /= 0.d0)
r_col = r_col/b_col
end where
call f90_amax(resmxp,r_col,desc_a)
!!$ ITER=IPARM(5)
!!$ ERR = RPARM(2)
if (amroot) then
write(6,*) 'methd iprec istopc : ',pre%prec, istopc
write(6,*) 'Number of iterations : ',iter
write(6,*) 'Time to Solve Matrix : ',T2
WRITE(6,*) 'Time per iteration : ',T2/(ITER)
WRITE(6,*) 'Error on exit : ',ERR
END IF
allocate(x_col_glob(m_problem),r_col_glob(m_problem),stat=ierr)
if (ierr.ne.0) then
write(0,*) 'Allocation error: no data collection'
else
call f90_psdgatherm(x_col_glob,x_col,desc_a,iroot=0)
call f90_psdgatherm(r_col_glob,r_col,desc_a,iroot=0)
if (amroot) then
write(0,*) 'Saving X on file'
write(20,*) 'Matrix: ',mtrx_file
write(20,*) 'Computed solution on ',NPROW,' processors.'
write(20,*) 'Iterations to convergence: ',iter
write(20,*) 'Error indicator (infinity norm) on exit:', &
& ' ||r||/(||A||||x||+||b||) = ',err
write(20,*) 'Max residual = ',resmx, resmxp
do i=1,m_problem
write(20,998) i,x_col_glob(i),r_col_glob(i),b_col_glob(i)
enddo
end if
end if
998 format(i8,4(2x,g20.14))
993 format(i6,4(1x,e12.6))
!!$ !
!!$ ! Raleygh quotients for first eigenvalue
!!$ !
!!$ CALL F90_PSDSall(M_problem,Q,ierrv,DESC_A)
!!$ CALL F90_PSDSall(M_problem,Z,ierrv,DESC_A)
!!$ CALL F90_PSDSall(M_problem,Z1,ierrv,DESC_A)
!!$ CALL F90_PSDSasb(Q,ierrv,DESC_A)
!!$ CALL F90_PSDSasb(Z,ierrv,DESC_A)
!!$ CALL F90_PSDSasb(Z1,ierrv,DESC_A)
!!$ scale = f90_psnrm2(x_col,desc_a)
!!$ scale = one/scale
!!$ call f90_psaxpby(scale,x_col,dzero,q,desc_A)
!!$ call f90_psspmm(one,a,q,dzero,z,desc_a)
!!$ do i=1, itmax
!!$ scale = f90_psnrm2(z,desc_a)
!!$ scale = one/scale
!!$ call f90_psaxpby(one,z,dzero,z1,desc_a)
!!$ call f90_psaxpby(scale,z,dzero,q,desc_a)
!!$ call f90_psspmm(one,a,q,dzero,z,desc_a)
!!$ lambda = f90_psdot(q,z,desc_A)
!!$ scale = f90_psnrm2(z,desc_A)
!!$ if (amroot) write(0,*) 'Lambda: ',i,lambda, scale
!!$ enddo
!!$ call f90_psaxpby(-one,z,one,z1,desc_a)
!!$ scale = f90_psnrm2(z1,desc_A)
!!$ if (amroot) write(0,*) 'Final check: ',i,lambda, scale
!!$ do i=1, desc_a%matrix_data(n_row_)
!!$ scale=z(i)/q(i)
!!$ write(0,*) 'Vector check: ',i,lambda, scale,abs(scale-lambda)
!!$ enddo
CALL F90_PSDSFREE(B_COL, DESC_A)
CALL F90_PSDSFREE(X_COL, DESC_A)
CALL F90_PSSPFREE(A, DESC_A)
CALL F90_PSPRECFREE(PRE,info)
CALL F90_PSDSCFREE(DESC_A,info)
CALL BLACS_GRIDEXIT(ICTXT)
CALL BLACS_EXIT(0)
END PROGRAM DF_SAMPLE

@ -1,419 +0,0 @@
program df_samplem
use f90sparse
use mat_dist
use read_mat
use partgraph
implicit none
! input parameters
character*20 :: cmethd, prec, mtrx_file, rhs_file
character*80 :: charbuf
integer :: inparms(20)
double precision ddot
external ddot
interface
! .....user passed subroutine.....
subroutine part_block(global_indx,n,np,pv,nv)
implicit none
integer, intent(in) :: global_indx, n, np
integer, intent(out) :: nv
integer, intent(out) :: pv(*)
end subroutine part_block
end interface ! local variables
interface
! .....user passed subroutine.....
subroutine part_blk2(global_indx,n,np,pv,nv)
implicit none
integer, intent(in) :: global_indx, n, np
integer, intent(out) :: nv
integer, intent(out) :: pv(*)
end subroutine part_blk2
end interface ! Local variables
integer, parameter :: izero=0, ione=1
character, parameter :: order='r'
real(kind(1.d0)), pointer, save :: b_col(:), x_col(:), r_col(:), &
& b_col_glob(:), x_col_glob(:), r_col_glob(:), b_glob(:,:), &
&z(:), q(:),z1(:), xm(:,:), ym(:,:)
integer :: iargc, check_descr, convert_descr
real(kind(1.d0)), parameter :: dzero = 0.d0, one = 1.d0
real(kind(1.d0)) :: mpi_wtime, t1, t2, tprec, r_amax, b_amax,bb(1,1),&
&lambda,scale,resmx,resmxp
integer :: nrhs, nrow, nx1, nx2, n_row, dim,iread,ip,io,no,nmats,imat,irenum
logical :: amroot
external iargc, mpi_wtime
integer bsze,overlap, nn, nprecs, novrs
common/part/bsze,overlap
integer, pointer :: work(:), precs(:), ovrs(:)
! sparse matrices
type(d_spmat) :: a, aux_a, h
type(d_prec) :: pre
!!$ type(d_precn) :: aprc
! dense matrices
real(kind(1.d0)), pointer :: aux_b(:,:) , aux1(:), aux2(:), vdiag(:), &
& aux_g(:,:), aux_x(:,:), d(:)
! communications data structure
type(desc_type) :: desc_a, desc_a_out
! blacs parameters
integer :: nprow, npcol, ictxt, iam, np, myprow, mypcol
! solver paramters
integer :: iter, itmax, ierr, itrace, ircode, ipart,&
& methd, istopc, ml
integer, pointer :: ierrv(:)
character(len=5) :: afmt
real(kind(1.d0)) :: err, eps
integer iparm(20)
real(kind(1.d0)) rparm(20)
! other variables
integer :: i,info,j
integer :: internal, m,ii,nnzero
! common area
integer m_problem, nproc
allocate(ierrv(6))
! initialize blacs
call blacs_pinfo(iam, np)
call blacs_get(izero, izero, ictxt)
! rectangular Grid, Np x 1
call blacs_gridinit(ictxt, order, np, ione)
call blacs_gridinfo(ictxt, nprow, npcol, myprow, mypcol)
amroot = (myprow==0).and.(mypcol==0)
!
! Get parameters from file
!
if (amroot) then
read(*,*) cmethd
do i = 1, len(cmethd)
inparms(i) = iachar(cmethd(i:i))
end do
call igebs2d(ictxt,'all',' ',20,1,inparms,20)
read(*,*) afmt
do i = 1, len(afmt)
inparms(i) = iachar(afmt(i:i))
end do
call igebs2d(ictxt,'all',' ',20,1,inparms,20)
read(*,*) ipart
read(*,*) itmax
read(*,*) itrace
read(*,*) istopc
read(*,*) irenum
read(*,*) nprecs
inparms(1) = ipart
inparms(2) = itmax
inparms(3) = itrace
inparms(4) = irenum
inparms(5) = nprecs
inparms(6) = istopc
call igebs2d(ictxt,'all',' ',6,1,inparms,20)
!!$ write(0,*) 'Sent nprecs: ',nprecs
allocate(precs(nprecs))
read(*,*) (precs(i),i=1,nprecs)
call igebs2d(ictxt,'all',' ',nprecs,1,precs,nprecs)
read(*,*) novrs
call igebs2d(ictxt,'all',' ',1,1,novrs,1)
!!$ write(0,*) 'Sent novrs: ',novrs
allocate(ovrs(novrs))
read(*,*) (ovrs(i),i=1,novrs)
call igebs2d(ictxt,'all',' ',novrs,1,ovrs,novrs)
read(*,*) eps
call dgebs2d(ictxt,'all',' ',1,1,eps,1)
read(*,*) nmats
call igebs2d(ictxt,'all',' ',1,1,nmats,1)
else
call igebr2d(ictxt,'a',' ',20,1,inparms,20,0,0)
do i = 1, len(cmethd)
cmethd(i:i) = achar(inparms(i))
end do
call igebr2d(ictxt,'a',' ',20,1,inparms,20,0,0)
do i = 1, len(afmt)
afmt(i:i) = achar(inparms(i))
end do
call igebr2d(ictxt,'all',' ',6,1,inparms,20,0,0)
ipart = inparms(1)
itmax = inparms(2)
itrace = inparms(3)
irenum = inparms(4)
nprecs = inparms(5)
istopc = inparms(6)
!!$ write(0,*) 'Recvd nprecs: ',nprecs
allocate(precs(nprecs))
call igebr2d(ictxt,'all',' ',nprecs,1,precs,nprecs,0,0)
call igebr2d(ictxt,'all',' ',1,1,novrs,1,0,0)
!!$ write(0,*) 'Recvd novrs: ',novrs
allocate(ovrs(novrs))
call igebr2d(ictxt,'all',' ',novrs,1,ovrs,novrs,0,0)
call dgebr2d(ictxt,'all',' ',1,1,eps,1,0,0)
call igebr2d(ictxt,'all',' ',1,1,nmats,1,0,0)
endif
do imat=1,nmats
if (amroot) then
read(*,*) mtrx_file, rhs_file
!!$ write(0,*) 'Read mtx rhs : "',mtrx_file,'" "',rhs_file,'"'
!!$ do i = 1, len(mtrx_file)
!!$ inparms(i) = iachar(mtrx_file(i:i))
!!$ end do
!!$ ! broadcast parameters to all processors
!!$ call igebs2d(ictxt,'all',' ',20,1,inparms,20)
!!$ do i = 1, len(rhs_file)
!!$ inparms(i) = iachar(rhs_file(i:i))
!!$ end do
!!$ ! broadcast parameters to all processors
!!$ call igebs2d(ictxt,'all',' ',20,1,inparms,20)
!!$ write(0,*) 'Sent mtx rhs : "',mtrx_file,'" "',rhs_file,'"'
!!$ else
!!$ call igebr2d(ictxt,'a',' ',20,1,inparms,20,0,0)
!!$ do i = 1, len(mtrx_file)
!!$ mtrx_file(i:i) = achar(inparms(i))
!!$ end do
!!$ call igebr2d(ictxt,'a',' ',20,1,inparms,20,0,0)
!!$ do i = 1, len(rhs_file)
!!$ rhs_file(i:i) = achar(inparms(i))
!!$ end do
!!$ write(0,*) 'Recvd mtx rhs : "',mtrx_file,'" "',rhs_file,'"'
endif
call blacs_barrier(ictxt,'A')
t1 = mpi_wtime()
! read the input matrix to be processed and (possibly) the RHS
nrhs = 1
nproc = nprow
if (amroot) then
nullify(aux_b)
call readmat(mtrx_file, aux_a, ictxt)
!!$ write(0,*) 'from readmat: ',aux_a%fida,aux_a%m,':',&
!!$ &aux_a%ia2(aux_a%m+1)-1,':',aux_a%ia1(1:10)
m_problem = aux_a%m
call igebs2d(ictxt,'a',' ',1,1,m_problem,1)
if (rhs_file /= 'none') then
! reading an rhs
call read_rhs(rhs_file,aux_b,ictxt)
end if
if (associated(aux_b).and.size(aux_b,1)==m_problem) then
! if any rhs were present, broadcast the first one
!!$ write(0,*) 'ok, got an rhs ',aux_b(m_problem,1)
b_col_glob =>aux_b(:,1)
else
!!$ write(0,*) 'inventing an rhs '
allocate(aux_b(m_problem,1), stat=ircode)
if (ircode /= 0) then
write(0,*) 'memory allocation failure in df_sample'
call blacs_abort(ictxt,-1)
stop
endif
b_col_glob =>aux_b(:,1)
do i=1, m_problem
b_col_glob(i) = 1.d0
enddo
endif
call dgebs2d(ictxt,'a',' ',m_problem,1,b_col_glob,m_problem)
else
call igebr2d(ictxt,'a',' ',1,1,m_problem,1,0,0)
!!$ write(0,*) 'Receiving AUX_B'
allocate(aux_b(m_problem,1), stat=ircode)
if (ircode /= 0) then
write(0,*) 'Memory allocation failure in DF_SAMPLE'
call blacs_abort(ictxt,-1)
stop
endif
b_col_glob =>aux_b(:,1)
call dgebr2d(ictxt,'A',' ',m_problem,1,b_col_glob,m_problem,0,0)
end if
! switch over different partition types
if (ipart.eq.0) then
call blacs_barrier(ictxt,'A')
!!$ write(0,*) 'Partition type: BLOCK'
call matdist(aux_a, a, part_block, ictxt, &
& desc_a,b_col_glob,b_col,fmt=afmt)
else if (ipart.eq.1) then
call blacs_barrier(ictxt,'A')
!!$ write(0,*) 'partition type: BLK2'
call matdist(aux_a, a, part_blk2, ictxt, &
& desc_a,b_col_glob,b_col,fmt=afmt)
else if (ipart.eq.2) then
!!$ write(0,*) 'partition type: GRAPH'
if (amroot) then
!!$ write(0,*) 'Call BUILD',size(aux_a%ia1),size(aux_a%ia2),np
!!$ write(0,*) 'Build type: GRAPH ',aux_a%fida,&
!!$ &aux_a%m
call build_grppart(aux_a%m,aux_a%fida,aux_a%ia1,aux_a%ia2,np)
endif
call distr_grppart(0,0,ictxt)
call matdist(aux_a, a, part_graph, ictxt, &
& desc_a,b_col_glob,b_col,fmt=afmt)
else
!!$ write(6,*) 'Partition type: BLOCK'
call matdist(aux_a, a, part_block, ictxt, &
& desc_a,b_col_glob,b_col,fmt=afmt)
end if
call f90_psdsall(m_problem,x_col,ierrv,desc_a)
x_col(:) =0.0
call f90_psdsasb(x_col,ierrv,desc_a)
call f90_psdsall(m_problem,r_col,ierrv,desc_a)
r_col(:) =0.0
call f90_psdsasb(r_col,ierrv,desc_a)
t2 = mpi_wtime() - t1
dim=size(a%aspk)
call dgamx2d(ictxt, 'a', ' ', ione, ione, t2, ione,&
& t1, t1, -1, -1, -1)
if (amroot) then
write(6,*) 'time to Read and Partition Matrix : ',T2
END IF
!
! Prepare the preconditioning matrix. Note the availability
! of optional parameters
!
do ip=1,nprecs
pre%prec=precs(ip)
if (pre%prec>2) then
no=novrs
else
no=1
endif
do io=1, no
if (pre%prec>2) then
pre%n_ovr=ovrs(io)
else
pre%n_ovr=0
endif
pre%irenum = irenum
!!$ if (amroot) write(0,*) 'Preconditioner : ',&
!!$ &PRE%PREC,pre%n_ovr
!!$ do i=1,a%m
!!$ do j=a%ia2(i),a%ia2(i+1)-1
!!$ write(0,*)'a ',i,a%ia1(j),a%aspk(j)
!!$ end do
!!$ end do
!!$
!!$ write(0,*)'halo_index',desc_a%halo_index(:)
!!$ write(0,*)'ovrlap_index',desc_a%ovrlap_index(:)
!!$ write(0,*)'ovrlap_elem',desc_a%ovrlap_elem(:)
! Zero initial guess.
call f90_psaxpby(0.d0,b_col,0.d0,x_col,desc_a)
call blacs_barrier(ictxt,'All')
t1 = mpi_wtime()
call preconditioner(a,pre,desc_a,info)!,'f')
tprec = mpi_wtime()-t1
write(0,*) myprow,' Preconditioner Time :',TPREC,' ',&
&pre%prec
call DGAMX2D(ICTXT,'A',' ',IONE, IONE,TPREC,IONE,T1,T1,-1,-1,-1)
if (amroot) then
write(0,*) 'Preconditioner Time :',TPREC,' ',&
&pre%prec
endif
if (info /= 0) then
write(0,*) 'error in preconditioner :',info
call blacs_abort(ictxt,-1)
stop
end if
if (pre%prec>=ras2lv_) then
write(*,*) myprow, 'Aggregation checks: ',pre%na_f1,pre%nn_f1,pre%na_tot
if (amroot) write(*,*) 'Output local aggregates ',pre%nlaggr(:)
end if
iparm = 0
rparm = 0.d0
call blacs_barrier(ictxt,'all')
t1 = mpi_wtime()
if (cmethd.eq.'BICGSTAB') Then
call f90_bicgstab(a,pre,b_col,x_col,eps,desc_a,&
& itmax,iter,err,ierr,itrace,istop=istopc)
!!$ ELSE IF (CMETHD.EQ.'BICG') Then
!!$ CALL F90_BICG(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,&
!!$ & ITMAX,ITER,ERR,IERR,ITRACE)
!!$ ELSE IF (CMETHD.EQ.'CGS') Then
!!$ CALL F90_CGS(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,&
!!$ & ITMAX,ITER,ERR,IERR,ITRACE)
!!$ ELSE IF (CMETHD.EQ.'BICGSTABL') Then
!!$ CALL F90_BICGSTABL(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,&
!!$ & ITMAX,ITER,ERR,IERR,ITRACE,ML)
endif
call blacs_barrier(ictxt,'all')
t2 = mpi_wtime() - t1
call dgamx2d(ictxt,'a',' ',ione, ione,t2,ione,t1,t1,-1,-1,-1)
call f90_psaxpby(1.d0,b_col,0.d0,r_col,desc_A)
call f90_psspmm(-1.d0,a,x_col,1.d0,r_col,desc_a)
call f90_nrm2(resmx,r_col,desc_a)
call f90_amax(resmxp,r_col,desc_a)
if (amroot) then
write(6,*) 'methd iprec istopc : ',pre%prec, pre%n_ovr, istopc
!!$ write(6,*) 'Number of iterations : ',iter
!!$ WRITE(6,*) 'Error on exit : ',ERR
write(6,*) 'Matrix: ',mtrx_file
write(6,*) 'Computed solution on ',NPROW,' processors.'
write(6,*) 'Iterations to convergence: ',iter
write(6,*) 'Error indicator on exit:',err
write(6,*) 'Time to Buil Prec. : ',TPREC
write(6,*) 'Time to Solve Matrix : ',T2
WRITE(6,*) 'Time per iteration : ',T2/(ITER)
write(6,*) 'Total Time : ',T2+TPREC
write(6,*) 'Residual norm 2 = ',resmx
write(6,*) 'Residual norm inf = ',resmxp
write(6,*)
write(6,*)
write(8,'(a18,3(1x,i2),1x,i5,5(1x,g9.4))') mtrx_file,nprow,pre%prec,pre%n_ovr,&
& iter,tprec,t2,t2+tprec,resmx,resmxp
END IF
!!$ write(0,*) 'Done matrix ',imat,ip,io
call blacs_barrier(ictxt,'all')
call f90_psprecfree(pre,info)
!!$ write(0,*) 'Done precfree'
call blacs_barrier(ictxt,'all')
end do
end do
deallocate(aux_b)
if (amroot) call spfree(aux_a,info)
call f90_psdsfree(b_col, desc_a)
call f90_psdsfree(x_col, desc_a)
call f90_psdsfree(r_col, desc_a)
call f90_psspfree(a, desc_a)
call f90_psdscfree(desc_a,info)
end do
call blacs_gridexit(ictxt)
call blacs_exit(0)
end program df_samplem

@ -1,376 +0,0 @@
PROGRAM TESTMM
USE F90SPARSE
USE MAT_DIST
USE READ_MAT
USE PARTGRAPH
USE GETP
IMPLICIT NONE
! Input parameters
CHARACTER*20 :: CMETHD, PREC, MTRX_FILE, RHS_FILE
CHARACTER*80 :: CHARBUF
DOUBLE PRECISION DDOT
EXTERNAL DDOT
INTERFACE
! .....user passed subroutine.....
SUBROUTINE PART_BLOCK(GLOBAL_INDX,N,NP,PV,NV)
IMPLICIT NONE
INTEGER, INTENT(IN) :: GLOBAL_INDX, N, NP
INTEGER, INTENT(OUT) :: NV
INTEGER, INTENT(OUT) :: PV(*)
END SUBROUTINE PART_BLOCK
END INTERFACE ! Local variables
INTEGER, PARAMETER :: IZERO=0, IONE=1
CHARACTER, PARAMETER :: ORDER='R'
REAL(KIND(1.D0)), POINTER, SAVE :: B_COL(:), X_COL(:), R_COL(:), &
& B_COL_GLOB(:), X_COL_GLOB(:), R_COL_GLOB(:), B_GLOB(:,:), &
&Z(:), Q(:),Z1(:), XM(:,:), YM1(:,:), YMM(:,:)
INTEGER :: IARGC, CHECK_DESCR, CONVERT_DESCR
Real(Kind(1.d0)), Parameter :: Dzero = 0.d0, One = 1.d0
Real(Kind(1.d0)) :: MPI_WTIME, T1, T2, TPREC, R_AMAX, B_AMAX,bb(1,1),&
&lambda,scale,resmx,resmxp, tlpm1, tlpmm, tt, tnc1
integer :: nrhs, nrow, nx1, nx2, n_row, dim,iread, itry
integer, parameter :: ntry=16
logical :: amroot
External IARGC, MPI_WTIME
integer bsze,overlap
common/part/bsze,overlap
INTEGER, POINTER :: WORK(:)
! Sparse Matrices
TYPE(D_SPMAT) :: A, AUX_A, H
TYPE(D_PREC) :: PRE
!!$ TYPE(D_PRECN) :: APRC
! Dense Matrices
REAL(KIND(1.D0)), POINTER :: AUX_B(:,:) , AUX1(:), AUX2(:), VDIAG(:), &
& AUX_G(:,:), AUX_X(:,:), D(:)
! Communications data structure
TYPE(desc_type) :: DESC_A, DESC_A_OUT
! BLACS parameters
INTEGER :: NPROW, NPCOL, ICTXT, IAM, NP, MYPROW, MYPCOL
! Solver paramters
INTEGER :: ITER, ITMAX, IERR, ITRACE, IRCODE, IPART,&
& METHD, ISTOPC, ML, NCOLS, nc
integer, pointer :: ierrv(:)
character(len=5) :: afmt
REAL(KIND(1.D0)) :: ERR, EPS
integer iparm(20)
real(kind(1.d0)) rparm(20)
! Other variables
INTEGER :: I,INFO,J
INTEGER :: INTERNAL, M,II,NNZERO
! common area
INTEGER M_PROBLEM, NPROC
allocate(ierrv(6))
! Initialize BLACS
CALL BLACS_PINFO(IAM, NP)
CALL BLACS_GET(IZERO, IZERO, ICTXT)
! Rectangular Grid, Np x 1
CALL BLACS_GRIDINIT(ICTXT, ORDER, NP, IONE)
CALL BLACS_GRIDINFO(ICTXT, NPROW, NPCOL, MYPROW, MYPCOL)
AMROOT = (MYPROW==0).AND.(MYPCOL==0)
!
! Get parameters
!
CALL GET_PARMS(ICTXT,MTRX_FILE,RHS_FILE,CMETHD,PREC,&
& IPART,AFMT,NCOLS,ITMAX,ITRACE,PRE%N_OVR,PRE%PREC,EPS)
CALL BLACS_BARRIER(ICTXT,'A')
T1 = MPI_WTIME()
! Read the input matrix to be processed and (possibly) the RHS
NRHS = 1
NPROC = NPROW
IF (AMROOT) THEN
NULLIFY(AUX_B)
CALL READMAT(MTRX_FILE, AUX_A, ICTXT)
WRITE(0,*) 'From readmat: ',aux_a%fida,aux_a%m,':',&
&aux_a%ia2(aux_a%m+1)-1,':',aux_a%ia1(1:10)
M_PROBLEM = AUX_A%M
CALL IGEBS2D(ICTXT,'A',' ',1,1,M_PROBLEM,1)
IF(RHS_FILE /= 'NONE') THEN
! Reading an RHS
CALL READ_RHS(RHS_FILE,AUX_B,ICTXT)
END IF
IF (ASSOCIATED(AUX_B).and.SIZE(AUX_B,1)==M_PROBLEM) THEN
! If any RHS were present, broadcast the first one
write(0,*) 'Ok, got an RHS ',aux_b(m_problem,1)
B_COL_GLOB =>AUX_B(:,1)
ELSE
write(0,*) 'Inventing an RHS '
ALLOCATE(AUX_B(M_PROBLEM,1), STAT=IRCODE)
IF (IRCODE /= 0) THEN
WRITE(0,*) 'Memory allocation failure in TESTMM'
CALL BLACS_ABORT(ICTXT,-1)
STOP
ENDIF
B_COL_GLOB =>AUX_B(:,1)
DO I=1, M_PROBLEM
B_COL_GLOB(I) = REAL(I)*2.0/REAL(M_PROBLEM)
ENDDO
ENDIF
CALL DGEBS2D(ICTXT,'A',' ',M_PROBLEM,1,B_COL_GLOB,M_PROBLEM)
ELSE
CALL IGEBR2D(ICTXT,'A',' ',1,1,M_PROBLEM,1,0,0)
WRITE(0,*) 'Receiving AUX_B'
ALLOCATE(AUX_B(M_PROBLEM,1), STAT=IRCODE)
IF (IRCODE /= 0) THEN
WRITE(0,*) 'Memory allocation failure in TESTMM'
CALL BLACS_ABORT(ICTXT,-1)
STOP
ENDIF
B_COL_GLOB =>AUX_B(:,1)
CALL DGEBR2D(ICTXT,'A',' ',M_PROBLEM,1,B_COL_GLOB,M_PROBLEM,0,0)
END IF
! Switch over different partition types
IF (IPART.EQ.0) THEN
CALL BLACS_BARRIER(ICTXT,'A')
WRITE(6,*) 'Partition type: BLOCK'
CALL MATDIST(AUX_A, A, PART_BLOCK, ICTXT, &
& DESC_A,B_COL_GLOB,B_COL,FMT=AFMT)
ELSE IF (IPART.EQ.2) THEN
IF (AMROOT) THEN
!!$ WRITE(0,*) 'Call BUILD',size(aux_a%ia1),size(aux_a%ia2),np
WRITE(0,*) 'Build type: GRAPH ',aux_a%fida,&
&aux_a%m
CALL BUILD_GRPPART(AUX_A%M,AUX_A%FIDA,AUX_A%IA1,AUX_A%IA2,NP)
ENDIF
CALL DISTR_GRPPART(0,0,ICTXT)
CALL MATDIST(AUX_A, A, PART_GRAPH, ICTXT, &
& DESC_A,B_COL_GLOB,B_COL,FMT=AFMT)
ELSE
WRITE(6,*) 'Partition type: BLOCK'
CALL MATDIST(AUX_A, A, PART_BLOCK, ICTXT, &
& DESC_A,B_COL_GLOB,B_COL,FMT=AFMT)
END IF
CALL F90_PSDSALL(M_PROBLEM,X_COL,IERRV,DESC_A)
X_COL(:) =0.0
CALL F90_PSDSASB(X_COL,IERRV,DESC_A)
CALL F90_PSDSALL(M_PROBLEM,R_COL,IERRV,DESC_A)
R_COL(:) =0.0
CALL F90_PSDSASB(R_COL,IERRV,DESC_A)
T2 = MPI_WTIME() - T1
CALL DGAMX2D(ICTXT, 'A', ' ', IONE, IONE, T2, IONE,&
& T1, T1, -1, -1, -1)
IF (AMROOT) THEN
WRITE(6,*) 'Time to Read and Partition Matrix : ',T2
END IF
!
! Prepare the preconditioning matrix. Note the availability
! of optional parameters
!
IF (AMROOT) WRITE(6,*) 'Preconditioner : "',PREC(1:6),'" ',PRE%PREC
!!$ do i=1,a%m
!!$ do j=a%ia2(i),a%ia2(i+1)-1
!!$ write(0,*)'a ',i,a%ia1(j),a%aspk(j)
!!$ end do
!!$ end do
!!$
!!$ write(0,*)'halo_index',desc_a%halo_index(:)
!!$ write(0,*)'ovrlap_index',desc_a%ovrlap_index(:)
!!$ write(0,*)'ovrlap_elem',desc_a%ovrlap_elem(:)
T1 = MPI_WTIME()
CALL PRECONDITIONER(A,PRE,DESC_A,INFO)!,'F')
TPREC = MPI_WTIME()-T1
CALL DGAMX2D(ICTXT,'A',' ',IONE, IONE,TPREC,IONE,T1,T1,-1,-1,-1)
WRITE(0,*) 'Preconditioner Time :',TPREC,' ',&
&prec,pre%prec
IF (INFO /= 0) THEN
WRITE(0,*) 'Error in preconditioner :',INFO
CALL BLACS_ABORT(ICTXT,-1)
STOP
END IF
IPARM = 0
RPARM = 0.D0
CALL BLACS_BARRIER(ICTXT,'All')
T1 = MPI_WTIME()
IF (CMETHD.EQ.'BICGSTAB') Then
CALL F90_BICGSTAB(A,PRE,B_COL,X_COL,EPS,DESC_A,&
& ITMAX,ITER,ERR,IERR,ITRACE)
!!$ ELSE IF (CMETHD.EQ.'BICG') Then
!!$ CALL F90_BICG(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,&
!!$ & ITMAX,ITER,ERR,IERR,ITRACE)
!!$ ELSE IF (CMETHD.EQ.'CGS') Then
!!$ CALL F90_CGS(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,&
!!$ & ITMAX,ITER,ERR,IERR,ITRACE)
!!$ ELSE IF (CMETHD.EQ.'BICGSTABL') Then
!!$ CALL F90_BICGSTABL(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,&
!!$ & ITMAX,ITER,ERR,IERR,ITRACE,ML)
ENDIF
CALL BLACS_BARRIER(ICTXT,'All')
T2 = MPI_WTIME() - T1
CALL DGAMX2D(ICTXT,'A',' ',IONE, IONE,T2,IONE,T1,T1,-1,-1,-1)
call f90_psaxpby(1.d0,b_col,0.d0,r_col,desc_A)
call f90_psspmm(-1.d0,a,x_col,1.d0,r_col,desc_a)
call f90_amax(resmx,r_col,desc_a)
where (b_col /= 0.d0)
r_col = r_col/b_col
end where
call f90_amax(resmxp,r_col,desc_a)
!!$ ITER=IPARM(5)
!!$ ERR = RPARM(2)
if (amroot) then
write(6,*) 'methd iprec : ',pre%prec
write(6,*) 'Number of iterations : ',iter
write(6,*) 'Time to Solve Matrix : ',t2
write(6,*) 'Time per iteration : ',t2/(iter)
write(6,*) 'Error on exit : ',err
end if
do nc=1, ncols
call f90_psdsall(m_problem,nc,xm,ierrv,desc_a)
call f90_psdsall(m_problem,nc,ym1,ierrv,desc_a)
call f90_psdsall(m_problem,nc,ymm,ierrv,desc_a)
ym1(:,:) = 0.d0
ymm(:,:) = 0.d0
do j=1,nc
xm(:,j) = j
end do
call f90_psdsasb(xm,ierrv,desc_a)
call f90_psdsasb(ym1,ierrv,desc_a)
call f90_psdsasb(ymm,ierrv,desc_a)
tlpm1 = 1.d200
do itry=1,ntry
call blacs_barrier(ictxt,'All')
T1 = MPI_WTIME()
do i=1, nc
call f90_psspmm(1.d0,a,xm(:,i),1.d0,ym1(:,i),desc_a)
enddo
t2 = mpi_wtime()-t1
call dgamx2d(ictxt,'a',' ',ione, ione,t2,ione,t1,t1,-1,-1,-1)
tlpm1 = min(tlpm1,t2)
!!$ write(0,*) 'Timing for loop ',nc,itry,t2
enddo
tlpmm = 1.d200
do itry=1,ntry
call blacs_barrier(ictxt,'All')
T1 = MPI_WTIME()
call f90_psspmm(1.d0,a,xm,1.d0,ymm,desc_a)
t2 = mpi_wtime()-t1
call dgamx2d(ictxt,'a',' ',ione, ione,t2,ione,t1,t1,-1,-1,-1)
tlpmm = min(tlpmm,t2)
!!$ write(0,*) 'Timing for mm ',nc,itry,t2
enddo
!!$ ymm = ymm - ym1
if (nc == 1) tnc1 = tlpm1
if (amroot) then
!!$ write(6,*) 'Size : ',ncols,size(xm,2),size(ym1,2)
!!$ write(6,*) 'Loop : ',tlpm1
!!$ write(6,*) 'Single call : ',tlpmm
write(6,997) nc, tlpm1, tlpmm, tlpm1/(nc*tnc1),tlpmm/(nc*tnc1)
997 format(i8,4(2x,g16.10))
end if
!!$ write(6,*) 'maxdiff : ',maxval(ymm)
call f90_psdsfree(xm,desc_a)
call f90_psdsfree(ymm,desc_a)
call f90_psdsfree(ym1,desc_a)
end do
if (.false.) then
allocate(x_col_glob(m_problem),r_col_glob(m_problem),stat=ierr)
if (ierr.ne.0) then
write(0,*) 'Allocation error: no data collection'
else
call f90_psdgatherm(x_col_glob,x_col,desc_a,iroot=0)
call f90_psdgatherm(r_col_glob,r_col,desc_a,iroot=0)
if (amroot) then
write(0,*) 'Saving X on file'
write(20,*) 'Matrix: ',mtrx_file
write(20,*) 'Computed solution on ',NPROW,' processors.'
write(20,*) 'Iterations to convergence: ',iter
write(20,*) 'Error indicator (infinity norm) on exit:', &
& ' ||r||/(||A||||x||+||b||) = ',err
write(20,*) 'Max residual = ',resmx, resmxp
do i=1,m_problem
write(20,998) i,x_col_glob(i),r_col_glob(i),b_col_glob(i)
enddo
end if
end if
998 format(i8,4(2x,g20.14))
993 format(i6,4(1x,e12.6))
!!$ !
!!$ ! Raleygh quotients for first eigenvalue
!!$ !
!!$ CALL F90_PSDSall(M_problem,Q,ierrv,DESC_A)
!!$ CALL F90_PSDSall(M_problem,Z,ierrv,DESC_A)
!!$ CALL F90_PSDSall(M_problem,Z1,ierrv,DESC_A)
!!$ CALL F90_PSDSasb(Q,ierrv,DESC_A)
!!$ CALL F90_PSDSasb(Z,ierrv,DESC_A)
!!$ CALL F90_PSDSasb(Z1,ierrv,DESC_A)
!!$ scale = f90_psnrm2(x_col,desc_a)
!!$ scale = one/scale
!!$ call f90_psaxpby(scale,x_col,dzero,q,desc_A)
!!$ call f90_psspmm(one,a,q,dzero,z,desc_a)
!!$ do i=1, itmax
!!$ scale = f90_psnrm2(z,desc_a)
!!$ scale = one/scale
!!$ call f90_psaxpby(one,z,dzero,z1,desc_a)
!!$ call f90_psaxpby(scale,z,dzero,q,desc_a)
!!$ call f90_psspmm(one,a,q,dzero,z,desc_a)
!!$ lambda = f90_psdot(q,z,desc_A)
!!$ scale = f90_psnrm2(z,desc_A)
!!$ if (amroot) write(0,*) 'Lambda: ',i,lambda, scale
!!$ enddo
!!$ call f90_psaxpby(-one,z,one,z1,desc_a)
!!$ scale = f90_psnrm2(z1,desc_A)
!!$ if (amroot) write(0,*) 'Final check: ',i,lambda, scale
!!$ do i=1, desc_a%matrix_data(n_row_)
!!$ scale=z(i)/q(i)
!!$ write(0,*) 'Vector check: ',i,lambda, scale,abs(scale-lambda)
!!$ enddo
endif
CALL F90_PSDSFREE(B_COL, DESC_A)
CALL F90_PSDSFREE(X_COL, DESC_A)
CALL F90_PSSPFREE(A, DESC_A)
CALL F90_PSPRECFREE(PRE,info)
CALL F90_PSDSCFREE(DESC_A,info)
CALL BLACS_GRIDEXIT(ICTXT)
CALL BLACS_EXIT(0)
END PROGRAM TESTMM

@ -1,154 +0,0 @@
* SUBROUTINE DESYM(NROW,A,JA,IA,AS,JAS,IAS,IAW,NNZERO) *
* *
* Purpose *
* ======= *
* Utility routine to convert from symmetric storage *
* to full format (CSR mode). *
* *
* Parameter *
* ========= *
* INPUT= *
* *
* SYMBOLIC NAME: NROW *
* POSITION: Parameter No.1 *
* ATTRIBUTES: INTEGER *
* VALUES: NROW>0 *
* DESCRIPTION: On entry NROW specifies the number of rows of the *
* input sparse matrix. The number of column of the input *
* sparse matrix mest be the same. *
* Unchanged on exit. *
* *
* SYMBOLIC NAME: A *
* POSITION: Parameter No.2 *
* ATTRIBUTES: DOUBLE PRECISION ARRAY of Dimension (NNZERO) *
* VALUES: *
* DESCRIPTION: A specifies the values of the input sparse matrix. *
* This matrix is stored in CSR mode *
* Unchanged on exit. *
* *
* SYMBOLIC NAME: JA *
* POSITION: Parameter No. 3 *
* ATTRIBUTES: INTEGER ARRAY(IA(NNZERO)) *
* VALUES: > 0 *
* DESCRIPTION: Column indices stored by rows refered to the input *
* sparse matrix. *
* Unchanged on exit. *
* *
* SYMBOLIC NAME: IA *
* POSITION: Parameter No. 4 *
* ATTRIBUTES: INTEGER ARRAY(NROW+1) *
* VALUES: >0; increasing. *
* DESCRIPTION: Row pointer array: it contains the starting *
* position of each row of A in array JA. *
* Unchanged on exit. *
* *
* SYMBOLIC NAME: IAW *
* POSITION: Parameter No. 7 *
* ATTRIBUTES: INTEGER ARRAY of Dimension (NROW+1) *
* VALUES: >0; *
* DESCRIPTION: Work Area. *
* *
* SYMBOLIC NAME: WORK *
* POSITION: Parameter No. 8 *
* ATTRIBUTES: REAL*8 ARRAY of Dimension (NROW+1) *
* VALUES: >0; *
* DESCRIPTION: Work Area. *
* *
* SYMBOLIC NAME: NNZERO *
* POSITION: Parameter No. 9 *
* ATTRIBUTES: INTEGER *
* VALUES: >0; *
* DESCRIPTION: On entry contains: the number of the non zero *
* entry of the input matrix. *
* Unchanged on exit. *
* OUTPUT== *
* *
* *
* SYMBOLIC NAME: AS *
* POSITION: Parameter No.5 *
* ATTRIBUTES: DOUBLE PRECISION ARRAY of Dimension (*) *
* VALUES: *
* DESCRIPTION: On exit A specifies the values of the output sparse *
* matrix. *
* This matrix correspondes to A rapresented in FULL-CSR *
* mode *
* *
* SYMBOLIC NAME: JAS *
* POSITION: Parameter No. 6 *
* ATTRIBUTES: INTEGER ARRAY(IAS(NROW+1)-1) *
* VALUES: > 0 *
* DESCRIPTION: Column indices stored by rows refered to the output *
* sparse matrix. *
* *
* SYMBOLIC NAME: IAS *
* POSITION: Parameter No. S *
* ATTRIBUTES: INTEGER ARRAY(NROW+1) *
* VALUES: >0; increasing. *
* DESCRIPTION: Row pointer array: it contains the starting *
* position of each row of AS in array JAS. *
*****************************************************************************
C
SUBROUTINE ZDESYM(NROW,A,JA,IA,AS,JAS,IAS,AUX,WORK,NNZERO,PTR,
+ NZR, VALUE)
IMPLICIT NONE
C .. Scalar Arguments ..
INTEGER NROW,NNZERO,VALUE,INDEX,PTR,NZR
C .. Array Arguments ..
COMPLEX*16 A(*),AS(*),WORK(*)
INTEGER IA(*),IAS(*),JAS(*),JA(*),AUX(*)
C .. Local Scalars ..
INTEGER I,IAW1,IAW2,IAWT,J,JPT,K,KPT,LDIM,NZL,JS, IRET, NEL,DIAGEL
C REAL*8 BUF
C ..
NEL = 0
DIAGEL=0
DO I=1, NNZERO
IF(JA(I).LE.IA(I)) THEN
NEL = NEL+1
AS(I) = A(I)
JAS(I) = JA(I)
IAS(I) = IA(I)
IF(JA(I).NE.IA(I)) THEN !This control avoids malfunctions in the cases
! where the matrix is declared symmetric but all
!his elements are explicitly stored
! see young1c.mtx from "Matrix Market"
AS(NNZERO+I) = A(I)
JAS(NNZERO+I) = IA(I)
IAS(NNZERO+I) = JA(I)
ELSE
DIAGEL = DIAGEL+1
END IF
END IF
END DO
C .... Order with key IAS ...
CALL MRGSRT(2*NNZERO,IAS,AUX,IRET)
IF (IRET.EQ.0) CALL ZREORDVN(2*NNZERO,AS,IAS,JAS,AUX)
C .... Order with key JAS ...
I = 1
J = I
DO WHILE (I.LE.(2*NNZERO))
DO WHILE ((IAS(J).EQ.IAS(I)).AND.
+ (J.LE.2*NNZERO))
J = J+1
ENDDO
NZL = J - I
CALL MRGSRT(NZL,JAS(I),AUX,IRET)
IF (IRET.EQ.0) CALL ZREORDVN(NZL,AS(I),IAS(I),JAS(I),
+ AUX)
I = J
ENDDO
NZR = NEL*2 - DIAGEL
PTR = 2*NNZERO-NZR+1
RETURN
END

@ -1,304 +0,0 @@
PROGRAM ZF_SAMPLE
USE TYPESP
USE TYPEDESC
USE F90TOOLS
USE F90PSBLAS
USE F90METHD
USE MAT_DIST
USE READ_MAT
USE PARTGRAPH
USE GETP
IMPLICIT NONE
! Input parameters
CHARACTER*20 :: CMETHD, PREC, MTRX_FILE, RHS_FILE
CHARACTER*80 :: CHARBUF
DOUBLE PRECISION DDOT
EXTERNAL DDOT
INTERFACE
! .....user passed subroutine.....
SUBROUTINE PART_BLOCK(GLOBAL_INDX,N,NP,PV,NV)
IMPLICIT NONE
INTEGER, INTENT(IN) :: GLOBAL_INDX, N, NP
INTEGER, INTENT(OUT) :: NV
INTEGER, INTENT(OUT) :: PV(*)
END SUBROUTINE PART_BLOCK
END INTERFACE ! Local variables
INTEGER, PARAMETER :: IZERO=0, IONE=1
CHARACTER, PARAMETER :: ORDER='R'
COMPLEX(KIND(1.D0)), POINTER,SAVE :: B_COL(:), X_COL(:), R_COL(:), &
& B_COL_GLOB(:), X_COL_GLOB(:), R_COL_GLOB(:), B_GLOB(:,:), &
&Z(:), Q(:),Z1(:)
INTEGER :: IARGC
Real(Kind(1.d0)), Parameter :: Dzero = 0.d0, One = 1.d0
Real(Kind(1.d0)) :: MPI_WTIME, T1, T2, TPREC, R_AMAX, B_AMAX,bb(1,1),lambda,scale,resmx,resmxp
integer :: nrhs, nrow, nx1, nx2, n_row
External IARGC, MPI_WTIME
integer bsze,overlap
common/part/bsze,overlap
! Sparse Matrices
TYPE(Z_SPMAT) :: A, AUX_A, L, U
!!$ TYPE(D_PRECN) :: APRC
! Dense Matrices
COMPLEX(KIND(1.D0)), POINTER :: AUX_B(:,:) , AUX1(:), AUX2(:), VDIAG(:),&
& AUX_G(:,:), AUX_X(:,:)
! Communications data structure
TYPE(desc_type) :: DESC_A
! BLACS parameters
INTEGER :: NPROW, NPCOL, ICTXT, IAM, NP, MYPROW, MYPCOL
logical :: amroot
! Solver paramters
INTEGER :: ITER, ITMAX, IERR, ITRACE, IRCODE, IPART,&
& IPREC, METHD, ISTOPC, ML
integer, pointer :: ierrv(:)
REAL(KIND(1.D0)) :: ERR, EPS
integer iparm(20)
real(kind(1.d0)) rparm(20)
! Other variables
INTEGER :: I,INFO,J
INTEGER :: INTERNAL, M,II,NNZERO
! common area
INTEGER M_PROBLEM, NPROC
allocate(ierrv(6))
! Initialize BLACS
CALL BLACS_PINFO(IAM, NP)
CALL BLACS_GET(IZERO, IZERO, ICTXT)
! Rectangular Grid, Np x 1
CALL BLACS_GRIDINIT(ICTXT, ORDER, NP, IONE)
CALL BLACS_GRIDINFO(ICTXT, NPROW, NPCOL, MYPROW, MYPCOL)
amroot = (myprow==0).and.(mypcol==0)
!
! Get parameters
!
CALL GET_PARMS(ICTXT,MTRX_FILE,RHS_FILE,CMETHD,PREC,&
& IPART,ISTOPC,ITMAX,ITRACE,ML,IPREC,EPS)
CALL BLACS_BARRIER(ICTXT,'A')
T1 = MPI_WTIME()
! Read the input matrix to be processed and (possibly) the RHS
NRHS = 1
IF (amroot) THEN
NULLIFY(AUX_B)
CALL ZREADMAT(MTRX_FILE, AUX_A, ICTXT)
M_PROBLEM = AUX_A%M
CALL IGEBS2D(ICTXT,'A',' ',1,1,M_PROBLEM,1)
IF(RHS_FILE.NE.'NONE') THEN
! Reading an RHS
CALL ZREAD_RHS(RHS_FILE,AUX_B,ICTXT)
END IF
IF (ASSOCIATED(AUX_B).and.SIZE(AUX_B,1)==M_PROBLEM) THEN
! If any RHS were present, broadcast the first one
write(0,*) 'Ok, got an RHS ',aux_b(m_problem,1)
B_COL_GLOB =>AUX_B(:,1)
ELSE
write(0,*) 'Inventing an RHS '
ALLOCATE(AUX_B(M_PROBLEM,1), STAT=IRCODE)
IF (IRCODE /= 0) THEN
WRITE(0,*) 'Memory allocation failure in ZF_SAMPLE'
CALL BLACS_ABORT(ICTXT,-1)
STOP
ENDIF
write(0,*) 'Check on AUX_B ',size(aux_b,1),size(aux_b,2),m_problem
B_COL_GLOB => AUX_B(:,1)
DO I=1, M_PROBLEM
B_COL_GLOB(I) = CMPLX(I*2.0/M_PROBLEM,0)
ENDDO
ENDIF
CALL ZGEBS2D(ICTXT,'A',' ',M_PROBLEM,1,B_COL_GLOB,M_PROBLEM)
ELSE
CALL IGEBR2D(ICTXT,'A',' ',1,1,M_PROBLEM,1,0,0)
WRITE(0,*) 'Receiving AUX_B'
ALLOCATE(AUX_B(M_PROBLEM,1), STAT=IRCODE)
IF (IRCODE /= 0) THEN
WRITE(0,*) 'Memory allocation failure in ZF_SAMPLE'
CALL BLACS_ABORT(ICTXT,-1)
STOP
ENDIF
write(0,*) 'Check on AUX_B ',size(aux_b,1),size(aux_b,2),m_problem
B_COL_GLOB =>AUX_B(:,1)
CALL ZGEBR2D(ICTXT,'A',' ',M_PROBLEM,1,B_COL_GLOB,M_PROBLEM,0,0)
END IF
NPROC = NPROW
! Switch over different partition types
IF (IPART.EQ.0) THEN
CALL BLACS_BARRIER(ICTXT,'A')
WRITE(6,*) 'Partition type: BLOCK'
CALL ZMATDIST(AUX_A, A, PART_BLOCK, ICTXT, &
& DESC_A,B_COL_GLOB,B_COL)
ELSE IF (IPART.EQ.2) THEN
WRITE(6,*) amroot,' Partition type: GRAPH'
IF (amroot) THEN
CALL BUILD_GRPPART(AUX_A%M,AUX_A%FIDA,AUX_A%IA1,AUX_A%IA2,NP)
ENDIF
call blacs_barrier(ictxt,'All')
CALL DISTR_GRPPART(0,0,ICTXT)
CALL ZMATDIST(AUX_A, A, PART_GRAPH, ICTXT, &
& DESC_A,B_COL_GLOB,B_COL)
ELSE
WRITE(6,*) 'Partition type: BLOCK'
CALL ZMATDIST(AUX_A, A, PART_BLOCK, ICTXT, &
& DESC_A,B_COL_GLOB,B_COL)
END IF
write(*,*) amroot,' Done matdist'
CALL F90_PSDSALL(M_PROBLEM,X_COL,IERRV,DESC_A)
X_COL(:) =0.0
CALL F90_PSDSASB(X_COL,IERRV,DESC_A)
CALL F90_PSDSALL(M_PROBLEM,R_COL,IERRV,DESC_A)
R_COL(:) =0.0
CALL F90_PSDSASB(R_COL,IERRV,DESC_A)
T2 = MPI_WTIME() - T1
CALL DGAMX2D(ICTXT, 'A', ' ', IONE, IONE, T2, IONE,&
& T1, T1, -1, -1, -1)
IF (amroot) THEN
WRITE(6,*) 'Time to Read and Partition Matrix : ',T2
END IF
!
! Prepare the preconditioning matrix. Note the availability
! of optional parameters
!
IF (amroot) WRITE(6,*) 'Preconditioner : "',PREC(1:6),'" ',iprec
T1 = MPI_WTIME()
CALL PRECONDITIONER(IPREC,A,L,U,VDIAG,DESC_A,INFO)
TPREC = MPI_WTIME()-T1
CALL DGAMX2D(ICTXT,'A',' ',IONE, IONE,TPREC,IONE,T1,T1,-1,-1,-1)
WRITE(0,*) 'Preconditioner Time : ',TPREC,' ',&
&prec,iprec
IF (INFO /= 0) THEN
WRITE(0,*) 'Error in preconditioner :',INFO
CALL BLACS_ABORT(ICTXT,-1)
STOP
END IF
IPARM = 0
RPARM = 0.D0
write(0,*) amroot,'Starting method'
CALL BLACS_BARRIER(ICTXT,'All')
T1 = MPI_WTIME()
IF (CMETHD.EQ.'BICGSTAB') Then
CALL F90_BICGSTAB(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,&
& ITMAX,ITER,ERR,IERR,ITRACE)
!!$ ELSE IF (CMETHD.EQ.'BICG') Then
!!$ CALL F90_BICG(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,&
!!$ & ITMAX,ITER,ERR,IERR,ITRACE)
!!$ ELSE IF (CMETHD.EQ.'CGS') Then
!!$ CALL F90_CGS(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,&
!!$ & ITMAX,ITER,ERR,IERR,ITRACE)
!!$ ELSE IF (CMETHD.EQ.'BICGSTABL') Then
!!$ CALL F90_BICGSTABL(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,&
!!$ & ITMAX,ITER,ERR,IERR,ITRACE,ML)
ENDIF
CALL BLACS_BARRIER(ICTXT,'All')
T2 = MPI_WTIME() - T1
CALL DGAMX2D(ICTXT,'A',' ',IONE, IONE,T2,IONE,T1,T1,-1,-1,-1)
call f90_psaxpby((1.d0,0.d0),b_col,(0.d0,0.d0),r_col,desc_A)
call f90_psspmm((-1.d0,0.d0),a,x_col,(1.d0,0.d0),r_col,desc_a)
call f90_amax(resmx,r_col,desc_a)
where (b_col/= 0)
r_col = r_col/b_col
end where
call f90_amax(resmxp,r_col,desc_a)
!!$ ITER=IPARM(5)
!!$ ERR = RPARM(2)
if (amroot) then
write(6,*) 'methd iprec istopc : ',iprec, istopc
write(6,*) 'Number of iterations : ',iter
write(6,*) 'Time to Solve Matrix : ',T2
WRITE(6,*) 'Time per iteration : ',T2/(ITER)
WRITE(6,*) 'Error on exit : ',ERR
END IF
allocate(x_col_glob(m_problem),r_col_glob(m_problem),stat=ierr)
if (ierr.ne.0) then
write(0,*) 'Allocation error: no data collection'
else
call f90_psdgatherm(x_col_glob,x_col,desc_a,iroot=0)
call f90_psdgatherm(r_col_glob,r_col,desc_a,iroot=0)
if (amroot) then
write(0,*) 'Saving X on file'
write(20,*) 'Matrix: ',mtrx_file
write(20,*) 'Computed solution on ',NPROW,' processors.'
write(20,*) 'Iterations to convergence: ',iter
write(20,*) 'Error indicator (infinity norm) on exit:', &
& ' ||r||/(||A||||x||+||b||) = ',err
write(20,*) 'Max residual = ',resmx, resmxp
do i=1,m_problem
write(20,998) i,x_col_glob(i),r_col_glob(i),b_col_glob(i)
enddo
end if
end if
998 format(i8,4(2x,g20.14))
993 format(i6,4(1x,e12.6))
!!$ !
!!$ ! Raleygh quotients for first eigenvalue
!!$ !
!!$ CALL F90_PSDSall(M_problem,Q,ierrv,DESC_A)
!!$ CALL F90_PSDSall(M_problem,Z,ierrv,DESC_A)
!!$ CALL F90_PSDSall(M_problem,Z1,ierrv,DESC_A)
!!$ CALL F90_PSDSasb(Q,ierrv,DESC_A)
!!$ CALL F90_PSDSasb(Z,ierrv,DESC_A)
!!$ CALL F90_PSDSasb(Z1,ierrv,DESC_A)
!!$ scale = f90_psnrm2(x_col,desc_a)
!!$ scale = one/scale
!!$ call f90_psaxpby(scale,x_col,dzero,q,desc_A)
!!$ call f90_psspmm(one,a,q,dzero,z,desc_a)
!!$ do i=1, itmax
!!$ scale = f90_psnrm2(z,desc_a)
!!$ scale = one/scale
!!$ call f90_psaxpby(one,z,dzero,z1,desc_a)
!!$ call f90_psaxpby(scale,z,dzero,q,desc_a)
!!$ call f90_psspmm(one,a,q,dzero,z,desc_a)
!!$ lambda = f90_psdot(q,z,desc_A)
!!$ scale = f90_psnrm2(z,desc_A)
!!$ if (amroot) write(0,*) 'Lambda: ',i,lambda, scale
!!$ enddo
!!$ call f90_psaxpby(-one,z,one,z1,desc_a)
!!$ scale = f90_psnrm2(z1,desc_A)
!!$ if (amroot) write(0,*) 'Final check: ',i,lambda, scale
!!$ do i=1, desc_a%matrix_data(n_row_)
!!$ scale=z(i)/q(i)
!!$ write(0,*) 'Vector check: ',i,lambda, scale,abs(scale-lambda)
!!$ enddo
CALL F90_PSDSFREE(B_COL, DESC_A)
CALL F90_PSDSFREE(X_COL, DESC_A)
CALL F90_PSSPFREE(A, DESC_A)
IF (IPREC.GE.2) THEN
CALL F90_PSSPFREE(L, DESC_A)
CALL F90_PSSPFREE(U, DESC_A)
END IF
CALL F90_PSDSCFREE(DESC_A,info)
CALL BLACS_GRIDEXIT(ICTXT)
CALL BLACS_EXIT(0)
END PROGRAM ZF_SAMPLE

@ -17,41 +17,13 @@ EXEDIR=./RUNS
LINKOPT=$(F90COPT) LINKOPT=$(F90COPT)
ppde90log: ppde90log.o part_block.o
$(F90LINK) $(LINKOPT) ppde90log.o part_block.o -o ppde90log\
$(METHD90LIB) $(TOOLS90LIB) $(BLAS90LIB) \
$(PSBLAS_LIB) $(SPARKER_LIB) $(BLAS)\
$(BLACS) -llmpe -lmpe
/bin/mv ppde90log $(EXEDIR)
ppde90: ppde90.o part_block.o ppde90: ppde90.o part_block.o
$(F90LINK) $(LINKOPT) ppde90.o part_block.o -o ppde90\ $(F90LINK) $(LINKOPT) ppde90.o part_block.o -o ppde90\
$(PSBLAS_LIB) $(BLACS) $(PSBLAS_LIB) $(BLACS)
/bin/mv ppde90 $(EXEDIR) /bin/mv ppde90 $(EXEDIR)
ppde90s: ppde90s.o part_block.o
$(F90LINK) $(LINKOPT) ppde90s.o part_block.o -o ppde90s\
$(METHD90LIB) $(TOOLS90LIB) $(BLAS90LIB) \
$(PSBLAS_LIB) $(SPARKER_LIB) $(BLAS)\
$(BLACS)
/bin/mv ppde90s $(EXEDIR)
ppde90.o: $(MODS) ppde90.o: $(MODS)
pp2d: pp2d.o part_block.o
$(F90LINK) $(LINKOPT) pp2d.o part_block.o -o pp2d\
$(METHD90LIB) $(TOOLS90LIB) $(BLAS90LIB) \
$(METHD_LIB) $(PSBLAS_LIB) $(SPARKER_LIB) $(BLAS)\
$(BLACS)
/bin/mv pp2d $(EXEDIR)
pp2ds: pp2ds.o part_block.o
$(F90LINK) $(LINKOPT) pp2ds.o part_block.o -o pp2ds\
$(METHD90LIB) $(TOOLS90LIB) $(BLAS90LIB) \
$(PSBLAS_LIB) $(SPARKER_LIB) $(BLAS)\
$(BLACS)
/bin/mv pp2ds $(EXEDIR)
.f90.o: .f90.o:
$(MPF90) $(F90COPT) $(INCDIRS) -c $< $(MPF90) $(F90COPT) $(INCDIRS) -c $<

Loading…
Cancel
Save