You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
psblas3/base/modules/comm/comm_schemes/psb_comm_rma_mod.F90

389 lines
16 KiB
Fortran

module psb_comm_rma_mod
use psb_const_mod
use psb_desc_const_mod, only: psb_proc_id_, psb_n_elem_recv_, psb_elem_recv_, &
& psb_n_elem_send_, psb_elem_send_
use psb_error_mod
#ifdef PSB_MPI_MOD
use mpi
#endif
use psb_comm_schemes_mod, only: psb_comm_handle_type, psb_comm_rma_pull_, psb_comm_rma_push_, &
& psb_comm_unknown_
implicit none
integer(psb_mpk_), parameter :: psb_rma_meta_tag = 913
#ifdef PSB_MPI_H
include 'mpif.h'
#endif
type, extends(psb_comm_handle_type) :: psb_comm_rma_handle
integer(psb_mpk_) :: win = mpi_win_null
logical :: window_ready = .false.
logical :: window_open = .false.
logical :: layout_ready = .false.
integer(psb_ipk_) :: layout_nnbr = -1
integer(psb_ipk_) :: layout_send = -1
integer(psb_ipk_) :: layout_recv = -1
integer(psb_ipk_), allocatable :: peer_proc(:)
integer(psb_ipk_), allocatable :: peer_send_counts(:)
integer(psb_ipk_), allocatable :: peer_recv_counts(:)
integer(psb_ipk_), allocatable :: peer_send_displs(:)
integer(psb_ipk_), allocatable :: peer_recv_displs(:)
integer(psb_ipk_), allocatable :: peer_mpi_rank(:)
integer(psb_ipk_), allocatable :: peer_remote_send_displs(:)
integer(psb_ipk_), allocatable :: peer_remote_recv_displs(:)
integer(psb_ipk_), allocatable :: peer_send_indexes(:)
integer(psb_ipk_), allocatable :: peer_recv_indexes(:)
integer(psb_mpk_), allocatable :: notify_buf(:)
integer(psb_mpk_), allocatable :: notify_recv_reqs(:)
integer(psb_mpk_), allocatable :: notify_send_reqs(:)
contains
procedure, pass :: init => psb_comm_rma_init
procedure, pass :: free => psb_comm_rma_free
procedure, pass :: clear_memory_buffer_layout => psb_comm_rma_clear_memory_buffer_layout
procedure, pass :: init_memory_buffer_layout => psb_comm_rma_ini_memory_buffer_layout
procedure, pass :: init_memory_buffer_layout_tran => psb_comm_rma_ini_memory_buffer_layout_tran
procedure, pass :: set_swap_status => psb_comm_rma_set_swap_status
procedure, pass :: get_swap_status => psb_comm_rma_get_swap_status
end type psb_comm_rma_handle
contains
subroutine psb_comm_rma_init(this, info)
class(psb_comm_rma_handle), intent(inout) :: this
integer(psb_ipk_), intent(out) :: info
info = 0
this%comm_type = psb_comm_unknown_
this%id = 0
this%swap_status = 0
this%win = mpi_win_null
this%window_ready = .false.
this%window_open = .false.
this%layout_ready = .false.
this%layout_nnbr = -1
this%layout_send = -1
this%layout_recv = -1
call this%clear_memory_buffer_layout(info)
end subroutine psb_comm_rma_init
subroutine psb_comm_rma_clear_memory_buffer_layout(this, info)
class(psb_comm_rma_handle), intent(inout) :: this
integer(psb_ipk_), intent(out) :: info
info = psb_success_
if (allocated(this%peer_proc)) deallocate(this%peer_proc)
if (allocated(this%peer_send_counts)) deallocate(this%peer_send_counts)
if (allocated(this%peer_recv_counts)) deallocate(this%peer_recv_counts)
if (allocated(this%peer_send_displs)) deallocate(this%peer_send_displs)
if (allocated(this%peer_recv_displs)) deallocate(this%peer_recv_displs)
if (allocated(this%peer_mpi_rank)) deallocate(this%peer_mpi_rank)
if (allocated(this%peer_remote_send_displs)) deallocate(this%peer_remote_send_displs)
if (allocated(this%peer_remote_recv_displs)) deallocate(this%peer_remote_recv_displs)
if (allocated(this%peer_send_indexes)) deallocate(this%peer_send_indexes)
if (allocated(this%peer_recv_indexes)) deallocate(this%peer_recv_indexes)
if (allocated(this%notify_buf)) deallocate(this%notify_buf)
if (allocated(this%notify_recv_reqs)) deallocate(this%notify_recv_reqs)
if (allocated(this%notify_send_reqs)) deallocate(this%notify_send_reqs)
this%layout_ready = .false.
this%layout_nnbr = -1
this%layout_send = -1
this%layout_recv = -1
end subroutine psb_comm_rma_clear_memory_buffer_layout
subroutine psb_comm_rma_ini_memory_buffer_layout(this, info, comm_list, peer_mpi_rank, &
& num_neighbors, total_send, total_recv, my_rank, icomm)
class(psb_comm_rma_handle), intent(inout) :: this
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_list(:)
integer(psb_ipk_), intent(in) :: peer_mpi_rank(:)
integer(psb_ipk_), intent(in) :: num_neighbors, total_send, total_recv
integer(psb_ipk_), intent(in) :: my_rank
integer(psb_mpk_), intent(in) :: icomm
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size), prc_rank
integer(psb_ipk_) :: n_neighbors, send_total, recv_total
integer(psb_ipk_) :: neighbor_idx, item_idx
integer(psb_ipk_) :: list_pos, send_offset, recv_offset
integer(psb_ipk_) :: proc_to_comm, recv_count, send_count
integer(psb_ipk_) :: local_meta(4), remote_meta(4)
call this%clear_memory_buffer_layout(info)
if (info /= psb_success_) return
n_neighbors = num_neighbors
send_total = total_send
recv_total = total_recv
if (n_neighbors > 0) then
allocate(this%peer_proc(n_neighbors), this%peer_send_counts(n_neighbors), &
& this%peer_recv_counts(n_neighbors), this%peer_send_displs(n_neighbors), &
& this%peer_recv_displs(n_neighbors), this%peer_mpi_rank(n_neighbors), &
& this%peer_remote_send_displs(n_neighbors), this%peer_remote_recv_displs(n_neighbors), stat=iret)
if (iret /= 0) then
info = psb_err_alloc_dealloc_
return
end if
allocate(this%notify_buf(n_neighbors), this%notify_recv_reqs(n_neighbors), &
& this%notify_send_reqs(n_neighbors), stat=iret)
if (iret /= 0) then
info = psb_err_alloc_dealloc_
return
end if
this%notify_buf = 0_psb_mpk_
this%notify_recv_reqs = MPI_REQUEST_NULL
this%notify_send_reqs = MPI_REQUEST_NULL
end if
if (send_total > 0) then
allocate(this%peer_send_indexes(send_total), stat=iret)
if (iret /= 0) then
info = psb_err_alloc_dealloc_
return
end if
end if
if (recv_total > 0) then
allocate(this%peer_recv_indexes(recv_total), stat=iret)
if (iret /= 0) then
info = psb_err_alloc_dealloc_
return
end if
end if
list_pos = 1
send_offset = 0
recv_offset = 0
do neighbor_idx = 1, n_neighbors
proc_to_comm = comm_list(list_pos + psb_proc_id_)
recv_count = comm_list(list_pos + psb_n_elem_recv_)
send_count = comm_list(list_pos + recv_count + psb_n_elem_send_)
this%peer_proc(neighbor_idx) = proc_to_comm
this%peer_recv_counts(neighbor_idx) = recv_count
this%peer_send_counts(neighbor_idx) = send_count
this%peer_recv_displs(neighbor_idx) = recv_offset
this%peer_send_displs(neighbor_idx) = send_offset
this%peer_mpi_rank(neighbor_idx) = peer_mpi_rank(neighbor_idx)
if (recv_count > 0) then
do item_idx = 1, recv_count
this%peer_recv_indexes(recv_offset + item_idx) = comm_list(list_pos + psb_elem_recv_ + item_idx - 1)
end do
end if
if (send_count > 0) then
do item_idx = 1, send_count
this%peer_send_indexes(send_offset + item_idx) = comm_list(list_pos + recv_count + psb_elem_send_ + item_idx - 1)
end do
end if
recv_offset = recv_offset + recv_count
send_offset = send_offset + send_count
list_pos = list_pos + recv_count + send_count + 3
end do
do neighbor_idx = 1, n_neighbors
proc_to_comm = this%peer_proc(neighbor_idx)
if (proc_to_comm /= my_rank) then
prc_rank = this%peer_mpi_rank(neighbor_idx)
local_meta = (/ this%peer_send_displs(neighbor_idx)+1, this%peer_send_counts(neighbor_idx), &
& this%peer_recv_displs(neighbor_idx)+1+send_total, this%peer_recv_counts(neighbor_idx) /)
call mpi_sendrecv(local_meta, 4, psb_mpi_mpk_, prc_rank, psb_rma_meta_tag, &
& remote_meta, 4, psb_mpi_mpk_, prc_rank, psb_rma_meta_tag, icomm, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
return
end if
this%peer_remote_send_displs(neighbor_idx) = remote_meta(1)
this%peer_remote_recv_displs(neighbor_idx) = remote_meta(3)
else
this%peer_remote_send_displs(neighbor_idx) = this%peer_send_displs(neighbor_idx)+1
this%peer_remote_recv_displs(neighbor_idx) = this%peer_recv_displs(neighbor_idx)+1+send_total
end if
end do
if ((send_offset /= send_total) .or. (recv_offset /= recv_total)) then
info = psb_err_internal_error_
return
end if
this%layout_nnbr = n_neighbors
this%layout_send = send_total
this%layout_recv = recv_total
this%layout_ready = .true.
end subroutine psb_comm_rma_ini_memory_buffer_layout
subroutine psb_comm_rma_free(this, info)
#ifdef PSB_MPI_MOD
use mpi
#endif
class(psb_comm_rma_handle), intent(inout) :: this
integer(psb_ipk_), intent(out) :: info
integer(psb_mpk_) :: iret
info = 0
if (this%window_open) then
call mpi_win_unlock_all(this%win, iret)
this%window_open = .false.
end if
if (this%win /= mpi_win_null) then
call mpi_win_free(this%win, iret)
this%win = mpi_win_null
end if
this%window_ready = .false.
this%layout_ready = .false.
this%layout_nnbr = -1
this%layout_send = -1
this%layout_recv = -1
call this%clear_memory_buffer_layout(info)
end subroutine psb_comm_rma_free
! Transpose variant: peer_send_* is filled from comm_list RECV area,
! peer_recv_* from comm_list SEND area. Metadata exchange tells peers our
! recv displacement so they can GET/PUT to the correct location in swaptran.
! layout_send = total_recv (effective send), layout_recv = total_send.
subroutine psb_comm_rma_ini_memory_buffer_layout_tran(this, info, comm_list, peer_mpi_rank, &
& num_neighbors, total_send, total_recv, my_rank, icomm)
class(psb_comm_rma_handle), intent(inout) :: this
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in) :: comm_list(:)
integer(psb_ipk_), intent(in) :: peer_mpi_rank(:)
integer(psb_ipk_), intent(in) :: num_neighbors, total_send, total_recv
integer(psb_ipk_), intent(in) :: my_rank
integer(psb_mpk_), intent(in) :: icomm
integer(psb_mpk_) :: iret, p2pstat(mpi_status_size), prc_rank
integer(psb_ipk_) :: n_neighbors, eff_send, eff_recv
integer(psb_ipk_) :: neighbor_idx, item_idx
integer(psb_ipk_) :: list_pos, send_offset, recv_offset
integer(psb_ipk_) :: proc_to_comm, actual_recv_count, actual_send_count
integer(psb_ipk_) :: local_meta(4), remote_meta(4)
call this%clear_memory_buffer_layout(info)
if (info /= psb_success_) return
n_neighbors = num_neighbors
! For transpose: effective send = actual recv, effective recv = actual send
eff_send = total_recv
eff_recv = total_send
if (n_neighbors > 0) then
allocate(this%peer_proc(n_neighbors), this%peer_send_counts(n_neighbors), &
& this%peer_recv_counts(n_neighbors), this%peer_send_displs(n_neighbors), &
& this%peer_recv_displs(n_neighbors), this%peer_mpi_rank(n_neighbors), &
& this%peer_remote_send_displs(n_neighbors), this%peer_remote_recv_displs(n_neighbors), stat=iret)
if (iret /= 0) then
info = psb_err_alloc_dealloc_
return
end if
allocate(this%notify_buf(n_neighbors), this%notify_recv_reqs(n_neighbors), &
& this%notify_send_reqs(n_neighbors), stat=iret)
if (iret /= 0) then
info = psb_err_alloc_dealloc_
return
end if
this%notify_buf = 0_psb_mpk_
this%notify_recv_reqs = MPI_REQUEST_NULL
this%notify_send_reqs = MPI_REQUEST_NULL
end if
if (eff_send > 0) then
allocate(this%peer_send_indexes(eff_send), stat=iret)
if (iret /= 0) then
info = psb_err_alloc_dealloc_
return
end if
end if
if (eff_recv > 0) then
allocate(this%peer_recv_indexes(eff_recv), stat=iret)
if (iret /= 0) then
info = psb_err_alloc_dealloc_
return
end if
end if
list_pos = 1
send_offset = 0
recv_offset = 0
do neighbor_idx = 1, n_neighbors
proc_to_comm = comm_list(list_pos + psb_proc_id_)
actual_recv_count = comm_list(list_pos + psb_n_elem_recv_)
actual_send_count = comm_list(list_pos + actual_recv_count + psb_n_elem_send_)
this%peer_proc(neighbor_idx) = proc_to_comm
! Swap: effective send = actual recv, effective recv = actual send
this%peer_send_counts(neighbor_idx) = actual_recv_count
this%peer_recv_counts(neighbor_idx) = actual_send_count
this%peer_send_displs(neighbor_idx) = send_offset
this%peer_recv_displs(neighbor_idx) = recv_offset
this%peer_mpi_rank(neighbor_idx) = peer_mpi_rank(neighbor_idx)
! peer_send_indexes from RECV area of comm_list (these are indices we gather for "sending")
if (actual_recv_count > 0) then
do item_idx = 1, actual_recv_count
this%peer_send_indexes(send_offset + item_idx) = comm_list(list_pos + psb_elem_recv_ + item_idx - 1)
end do
end if
! peer_recv_indexes from SEND area of comm_list (these are indices we scatter into)
if (actual_send_count > 0) then
do item_idx = 1, actual_send_count
this%peer_recv_indexes(recv_offset + item_idx) = comm_list(list_pos + actual_recv_count + psb_elem_send_ + item_idx - 1)
end do
end if
send_offset = send_offset + actual_recv_count
recv_offset = recv_offset + actual_send_count
list_pos = list_pos + actual_recv_count + actual_send_count + 3
end do
do neighbor_idx = 1, n_neighbors
proc_to_comm = this%peer_proc(neighbor_idx)
if (proc_to_comm /= my_rank) then
prc_rank = this%peer_mpi_rank(neighbor_idx)
! Tell peer: our effective-send (=actual recv) displacement and count,
! our effective-recv (=actual send) displacement (offset past eff_send area).
local_meta = (/ this%peer_send_displs(neighbor_idx)+1, this%peer_send_counts(neighbor_idx), &
& this%peer_recv_displs(neighbor_idx)+1+eff_send, this%peer_recv_counts(neighbor_idx) /)
call mpi_sendrecv(local_meta, 4, psb_mpi_mpk_, prc_rank, psb_rma_meta_tag, &
& remote_meta, 4, psb_mpi_mpk_, prc_rank, psb_rma_meta_tag, icomm, p2pstat, iret)
if (iret /= mpi_success) then
info = psb_err_mpi_error_
return
end if
this%peer_remote_send_displs(neighbor_idx) = remote_meta(1)
this%peer_remote_recv_displs(neighbor_idx) = remote_meta(3)
else
this%peer_remote_send_displs(neighbor_idx) = this%peer_send_displs(neighbor_idx)+1
this%peer_remote_recv_displs(neighbor_idx) = this%peer_recv_displs(neighbor_idx)+1+eff_send
end if
end do
if ((send_offset /= eff_send) .or. (recv_offset /= eff_recv)) then
info = psb_err_internal_error_
return
end if
this%layout_nnbr = n_neighbors
this%layout_send = eff_send
this%layout_recv = eff_recv
this%layout_ready = .true.
end subroutine psb_comm_rma_ini_memory_buffer_layout_tran
subroutine psb_comm_rma_set_swap_status(this, flag, info)
class(psb_comm_rma_handle), intent(inout) :: this
integer(psb_ipk_), intent(in) :: flag
integer(psb_ipk_), intent(out) :: info
info = 0
this%swap_status = flag
end subroutine psb_comm_rma_set_swap_status
subroutine psb_comm_rma_get_swap_status(this, flag, info)
class(psb_comm_rma_handle), intent(in) :: this
integer(psb_ipk_), intent(out) :: flag
integer(psb_ipk_), intent(out) :: info
info = 0
flag = this%swap_status
end subroutine psb_comm_rma_get_swap_status
end module psb_comm_rma_mod