From 10f47d731d87584c96ce71edf5f0786fb7873857 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Sat, 25 Jan 2020 11:10:33 +0000 Subject: [PATCH] Define versions of global transpose for IPK matrices --- base/modules/tools/psb_c_tools_mod.f90 | 26 +- base/modules/tools/psb_d_tools_mod.f90 | 26 +- base/modules/tools/psb_s_tools_mod.f90 | 26 +- base/modules/tools/psb_z_tools_mod.f90 | 26 +- base/tools/psb_c_glob_transpose.F90 | 345 ++++++++++++++----------- base/tools/psb_d_glob_transpose.F90 | 345 ++++++++++++++----------- base/tools/psb_s_glob_transpose.F90 | 345 ++++++++++++++----------- base/tools/psb_z_glob_transpose.F90 | 345 ++++++++++++++----------- 8 files changed, 884 insertions(+), 600 deletions(-) diff --git a/base/modules/tools/psb_c_tools_mod.f90 b/base/modules/tools/psb_c_tools_mod.f90 index 056fa226..c843b683 100644 --- a/base/modules/tools/psb_c_tools_mod.f90 +++ b/base/modules/tools/psb_c_tools_mod.f90 @@ -33,7 +33,7 @@ Module psb_c_tools_mod use psb_desc_mod, only : psb_desc_type, psb_spk_, psb_ipk_, psb_lpk_ use psb_c_vect_mod, only : psb_c_base_vect_type, psb_c_vect_type use psb_c_mat_mod, only : psb_cspmat_type, psb_lcspmat_type, psb_c_base_sparse_mat, & - & psb_lc_csr_sparse_mat, psb_lc_coo_sparse_mat + & psb_lc_csr_sparse_mat, psb_lc_coo_sparse_mat, psb_c_coo_sparse_mat use psb_l_vect_mod, only : psb_l_vect_type use psb_c_multivect_mod, only : psb_c_base_multivect_type, psb_c_multivect_type @@ -347,7 +347,15 @@ Module psb_c_tools_mod end interface psb_par_spspmm interface psb_glob_transpose - + subroutine psb_c_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) + import + type(psb_c_coo_sparse_mat), intent(inout) :: ain + type(psb_desc_type), intent(inout), target :: desc_r + type(psb_c_coo_sparse_mat), intent(out), optional :: atrans + type(psb_desc_type), intent(inout), target, optional :: desc_c + type(psb_desc_type), intent(out), optional :: desc_rx + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_coo_glob_transpose subroutine psb_lc_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) import type(psb_lc_coo_sparse_mat), intent(inout) :: ain @@ -364,13 +372,25 @@ Module psb_c_tools_mod type(psb_desc_type) :: desc_a integer(psb_ipk_), intent(out) :: info end subroutine psb_lc_simple_glob_transpose - subroutine psb_lc_simple_glob_transpose_ip(ain,desc_a,info) import type(psb_lcspmat_type), intent(inout) :: ain type(psb_desc_type) :: desc_a integer(psb_ipk_), intent(out) :: info end subroutine psb_lc_simple_glob_transpose_ip + subroutine psb_c_simple_glob_transpose(ain,aout,desc_a,info) + import + type(psb_cspmat_type), intent(in) :: ain + type(psb_cspmat_type), intent(out) :: aout + type(psb_desc_type) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_simple_glob_transpose + subroutine psb_c_simple_glob_transpose_ip(ain,desc_a,info) + import + type(psb_cspmat_type), intent(inout) :: ain + type(psb_desc_type) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_simple_glob_transpose_ip end interface psb_glob_transpose diff --git a/base/modules/tools/psb_d_tools_mod.f90 b/base/modules/tools/psb_d_tools_mod.f90 index 9d9dd037..ba1057d6 100644 --- a/base/modules/tools/psb_d_tools_mod.f90 +++ b/base/modules/tools/psb_d_tools_mod.f90 @@ -33,7 +33,7 @@ Module psb_d_tools_mod use psb_desc_mod, only : psb_desc_type, psb_dpk_, psb_ipk_, psb_lpk_ use psb_d_vect_mod, only : psb_d_base_vect_type, psb_d_vect_type use psb_d_mat_mod, only : psb_dspmat_type, psb_ldspmat_type, psb_d_base_sparse_mat, & - & psb_ld_csr_sparse_mat, psb_ld_coo_sparse_mat + & psb_ld_csr_sparse_mat, psb_ld_coo_sparse_mat, psb_d_coo_sparse_mat use psb_l_vect_mod, only : psb_l_vect_type use psb_d_multivect_mod, only : psb_d_base_multivect_type, psb_d_multivect_type @@ -347,7 +347,15 @@ Module psb_d_tools_mod end interface psb_par_spspmm interface psb_glob_transpose - + subroutine psb_d_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) + import + type(psb_d_coo_sparse_mat), intent(inout) :: ain + type(psb_desc_type), intent(inout), target :: desc_r + type(psb_d_coo_sparse_mat), intent(out), optional :: atrans + type(psb_desc_type), intent(inout), target, optional :: desc_c + type(psb_desc_type), intent(out), optional :: desc_rx + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_coo_glob_transpose subroutine psb_ld_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) import type(psb_ld_coo_sparse_mat), intent(inout) :: ain @@ -364,13 +372,25 @@ Module psb_d_tools_mod type(psb_desc_type) :: desc_a integer(psb_ipk_), intent(out) :: info end subroutine psb_ld_simple_glob_transpose - subroutine psb_ld_simple_glob_transpose_ip(ain,desc_a,info) import type(psb_ldspmat_type), intent(inout) :: ain type(psb_desc_type) :: desc_a integer(psb_ipk_), intent(out) :: info end subroutine psb_ld_simple_glob_transpose_ip + subroutine psb_d_simple_glob_transpose(ain,aout,desc_a,info) + import + type(psb_dspmat_type), intent(in) :: ain + type(psb_dspmat_type), intent(out) :: aout + type(psb_desc_type) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_simple_glob_transpose + subroutine psb_d_simple_glob_transpose_ip(ain,desc_a,info) + import + type(psb_dspmat_type), intent(inout) :: ain + type(psb_desc_type) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_simple_glob_transpose_ip end interface psb_glob_transpose diff --git a/base/modules/tools/psb_s_tools_mod.f90 b/base/modules/tools/psb_s_tools_mod.f90 index be57e5b3..740a7949 100644 --- a/base/modules/tools/psb_s_tools_mod.f90 +++ b/base/modules/tools/psb_s_tools_mod.f90 @@ -33,7 +33,7 @@ Module psb_s_tools_mod use psb_desc_mod, only : psb_desc_type, psb_spk_, psb_ipk_, psb_lpk_ use psb_s_vect_mod, only : psb_s_base_vect_type, psb_s_vect_type use psb_s_mat_mod, only : psb_sspmat_type, psb_lsspmat_type, psb_s_base_sparse_mat, & - & psb_ls_csr_sparse_mat, psb_ls_coo_sparse_mat + & psb_ls_csr_sparse_mat, psb_ls_coo_sparse_mat, psb_s_coo_sparse_mat use psb_l_vect_mod, only : psb_l_vect_type use psb_s_multivect_mod, only : psb_s_base_multivect_type, psb_s_multivect_type @@ -347,7 +347,15 @@ Module psb_s_tools_mod end interface psb_par_spspmm interface psb_glob_transpose - + subroutine psb_s_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) + import + type(psb_s_coo_sparse_mat), intent(inout) :: ain + type(psb_desc_type), intent(inout), target :: desc_r + type(psb_s_coo_sparse_mat), intent(out), optional :: atrans + type(psb_desc_type), intent(inout), target, optional :: desc_c + type(psb_desc_type), intent(out), optional :: desc_rx + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_coo_glob_transpose subroutine psb_ls_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) import type(psb_ls_coo_sparse_mat), intent(inout) :: ain @@ -364,13 +372,25 @@ Module psb_s_tools_mod type(psb_desc_type) :: desc_a integer(psb_ipk_), intent(out) :: info end subroutine psb_ls_simple_glob_transpose - subroutine psb_ls_simple_glob_transpose_ip(ain,desc_a,info) import type(psb_lsspmat_type), intent(inout) :: ain type(psb_desc_type) :: desc_a integer(psb_ipk_), intent(out) :: info end subroutine psb_ls_simple_glob_transpose_ip + subroutine psb_s_simple_glob_transpose(ain,aout,desc_a,info) + import + type(psb_sspmat_type), intent(in) :: ain + type(psb_sspmat_type), intent(out) :: aout + type(psb_desc_type) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_simple_glob_transpose + subroutine psb_s_simple_glob_transpose_ip(ain,desc_a,info) + import + type(psb_sspmat_type), intent(inout) :: ain + type(psb_desc_type) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_simple_glob_transpose_ip end interface psb_glob_transpose diff --git a/base/modules/tools/psb_z_tools_mod.f90 b/base/modules/tools/psb_z_tools_mod.f90 index 465a3990..c421cb77 100644 --- a/base/modules/tools/psb_z_tools_mod.f90 +++ b/base/modules/tools/psb_z_tools_mod.f90 @@ -33,7 +33,7 @@ Module psb_z_tools_mod use psb_desc_mod, only : psb_desc_type, psb_dpk_, psb_ipk_, psb_lpk_ use psb_z_vect_mod, only : psb_z_base_vect_type, psb_z_vect_type use psb_z_mat_mod, only : psb_zspmat_type, psb_lzspmat_type, psb_z_base_sparse_mat, & - & psb_lz_csr_sparse_mat, psb_lz_coo_sparse_mat + & psb_lz_csr_sparse_mat, psb_lz_coo_sparse_mat, psb_z_coo_sparse_mat use psb_l_vect_mod, only : psb_l_vect_type use psb_z_multivect_mod, only : psb_z_base_multivect_type, psb_z_multivect_type @@ -347,7 +347,15 @@ Module psb_z_tools_mod end interface psb_par_spspmm interface psb_glob_transpose - + subroutine psb_z_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) + import + type(psb_z_coo_sparse_mat), intent(inout) :: ain + type(psb_desc_type), intent(inout), target :: desc_r + type(psb_z_coo_sparse_mat), intent(out), optional :: atrans + type(psb_desc_type), intent(inout), target, optional :: desc_c + type(psb_desc_type), intent(out), optional :: desc_rx + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_coo_glob_transpose subroutine psb_lz_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) import type(psb_lz_coo_sparse_mat), intent(inout) :: ain @@ -364,13 +372,25 @@ Module psb_z_tools_mod type(psb_desc_type) :: desc_a integer(psb_ipk_), intent(out) :: info end subroutine psb_lz_simple_glob_transpose - subroutine psb_lz_simple_glob_transpose_ip(ain,desc_a,info) import type(psb_lzspmat_type), intent(inout) :: ain type(psb_desc_type) :: desc_a integer(psb_ipk_), intent(out) :: info end subroutine psb_lz_simple_glob_transpose_ip + subroutine psb_z_simple_glob_transpose(ain,aout,desc_a,info) + import + type(psb_zspmat_type), intent(in) :: ain + type(psb_zspmat_type), intent(out) :: aout + type(psb_desc_type) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_simple_glob_transpose + subroutine psb_z_simple_glob_transpose_ip(ain,desc_a,info) + import + type(psb_zspmat_type), intent(inout) :: ain + type(psb_desc_type) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_simple_glob_transpose_ip end interface psb_glob_transpose diff --git a/base/tools/psb_c_glob_transpose.F90 b/base/tools/psb_c_glob_transpose.F90 index 0a0d940b..9b1f99b4 100644 --- a/base/tools/psb_c_glob_transpose.F90 +++ b/base/tools/psb_c_glob_transpose.F90 @@ -1,3 +1,45 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_c_glob_transpose.f90 +! +! Subroutine: psb_c_glob_transpose +! Version: complex +! +! This file provides multiple related versions of a parallel +! global transpose +! +! B = A^T +! +! #undef SP_A2AV_MPI #undef SP_A2AV_XI #define SP_A2AV_MAT @@ -282,12 +324,41 @@ subroutine psb_lc_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) return end subroutine psb_lc_coo_glob_transpose +subroutine psb_c_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) +#ifdef MPI_MOD + use mpi +#endif + use psb_base_mod, psb_protect_name => psb_c_coo_glob_transpose + Implicit None +#ifdef MPI_H + include 'mpif.h' +#endif + type(psb_c_coo_sparse_mat), intent(inout) :: ain + type(psb_desc_type), intent(inout), target :: desc_r + type(psb_c_coo_sparse_mat), intent(out), optional :: atrans + type(psb_desc_type), intent(inout), target, optional :: desc_c + type(psb_desc_type), intent(out), optional :: desc_rx + integer(psb_ipk_), intent(out) :: info + + type(psb_lc_coo_sparse_mat) :: atcoo -subroutine psb_lc_simple_glob_transpose(ain,aout,desc_a,info) - use psb_base_mod, psb_protect_name => psb_lc_simple_glob_transpose + if (present(atrans)) then + call ain%cp_to_lcoo(atcoo,info) + else + call ain%mv_to_lcoo(atcoo,info) + end if + if (info == 0) call psb_glob_transpose(atcoo,desc_r,info,desc_c=desc_c,desc_rx=desc_rx) + if (present(atrans)) then + call atrans%mv_from_lcoo(atcoo,info) + else + call ain%mv_from_lcoo(atcoo,info) + end if +end subroutine psb_c_coo_glob_transpose + +subroutine psb_c_simple_glob_transpose_ip(ain,desc_a,info) + use psb_base_mod, psb_protect_name => psb_c_simple_glob_transpose_ip implicit none - type(psb_lcspmat_type), intent(in) :: ain - type(psb_lcspmat_type), intent(out) :: aout + type(psb_cspmat_type), intent(inout) :: ain type(psb_desc_type) :: desc_a integer(psb_ipk_), intent(out) :: info @@ -296,9 +367,7 @@ subroutine psb_lc_simple_glob_transpose(ain,aout,desc_a,info) ! that the same DESC_A works for both A and A^T, which ! essentially means that A has a symmetric pattern. ! - type(psb_lcspmat_type) :: atmp, ahalo, aglb - type(psb_lc_coo_sparse_mat) :: tmpcoo, tmpc1, tmpc2, tmpch - type(psb_lc_csr_sparse_mat) :: tmpcsr + type(psb_lc_coo_sparse_mat) :: tmpc1, tmpc2 integer(psb_ipk_) :: nz1, nz2, nzh, nz integer(psb_ipk_) :: ictxt, me, np integer(psb_lpk_) :: i, j, k, nrow, ncol, nlz @@ -317,85 +386,76 @@ subroutine psb_lc_simple_glob_transpose(ain,aout,desc_a,info) end if - if (.true.) then - call ain%cp_to(tmpc1) - call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) - call aout%mv_from(tmpc2) - else - - call ain%cscnv(tmpcsr,info) + call ain%mv_to(tmpc1) + call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) + call ain%mv_from(tmpc2) - if (debug) then - ilv = [(i,i=1,ncol)] - call desc_a%l2gip(ilv,info,owned=.false.) - write(aname,'(a,i3.3,a)') 'atmp-preh-',me,'.mtx' - call ain%print(fname=aname,head='atmp before haloTest ',iv=ilv) - end if - if (dump) then - call ain%cscnv(atmp,info) - call psb_gather(aglb,atmp,desc_a,info) + if (dump) then + block + type(psb_lcspmat_type) :: aglb + write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' + call ain%print(fname=aname,head='atrans ') + call psb_gather(aglb,ain,desc_a,info) if (me==psb_root_) then - write(aname,'(a,i3.3,a)') 'aglob-prehalo.mtx' + write(aname,'(a,i3.3,a)') 'atran.mtx' call aglb%print(fname=aname,head='Test ') end if - end if + end block + end if - !call psb_loc_to_glob(tmpcsr%ja,desc_a,info) - call atmp%mv_from(tmpcsr) +end subroutine psb_c_simple_glob_transpose_ip - if (debug) then - write(aname,'(a,i3.3,a)') 'tmpcsr-',me,'.mtx' - call atmp%print(fname=aname,head='tmpcsr ',iv=ilv) - !call psb_set_debug_level(9999) - end if +subroutine psb_c_simple_glob_transpose(ain,aout,desc_a,info) + use psb_base_mod, psb_protect_name => psb_c_simple_glob_transpose + implicit none + type(psb_cspmat_type), intent(in) :: ain + type(psb_cspmat_type), intent(out) :: aout + type(psb_desc_type) :: desc_a + integer(psb_ipk_), intent(out) :: info - ! FIXME THIS NEEDS REWORKING - if (debug) write(0,*) me,' Htranspose into sphalo :',atmp%get_nrows(),atmp%get_ncols() - - call psb_sphalo(atmp,desc_a,ahalo,info, outfmt='coo ') - call atmp%mv_to(tmpcoo) - call ahalo%mv_to(tmpch) - nz1 = tmpcoo%get_nzeros() - nzh = tmpch%get_nzeros() - nlz = nz1+nzh - call tmpcoo%reallocate(nlz) - tmpcoo%ia(nz1+1:nz1+nzh) = tmpch%ia(1:nzh) - tmpcoo%ja(nz1+1:nz1+nzh) = tmpch%ja(1:nzh) - tmpcoo%val(nz1+1:nz1+nzh) = tmpch%val(1:nzh) - call psb_loc_to_glob(tmpcoo%ia(1:nlz),desc_a,info,iact='I') - call psb_loc_to_glob(tmpcoo%ja(1:nlz),desc_a,info,iact='I') - call tmpcoo%set_nzeros(nlz) - call tmpcoo%transp() - nz = tmpcoo%get_nzeros() - call psb_glob_to_loc(tmpcoo%ia(1:nz),desc_a,info,iact='I') - call psb_glob_to_loc(tmpcoo%ja(1:nz),desc_a,info,iact='I') - - call tmpcoo%clean_negidx(info) - - call aout%mv_from(tmpcoo) - call aout%csclip(info,imax=nrow) - - - if (debug) write(0,*) 'After clip:',aout%get_nzeros() - - if (debug_sync) then - call psb_barrier(ictxt) - if (me == 0) write(0,*) 'End htranspose ' - end if - !call aout%cscnv(info,type='csr') - endif - if (dump) then - write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' - call aout%print(fname=aname,head='atrans ',iv=ilv) - call psb_gather(aglb,aout,desc_a,info) - if (me==psb_root_) then - write(aname,'(a,i3.3,a)') 'atran.mtx' - call aglb%print(fname=aname,head='Test ') - end if + ! + ! BEWARE: This routine works under the assumption + ! that the same DESC_A works for both A and A^T, which + ! essentially means that A has a symmetric pattern. + ! + type(psb_lc_coo_sparse_mat) :: tmpc1, tmpc2 + integer(psb_ipk_) :: nz1, nz2, nzh, nz + integer(psb_ipk_) :: ictxt, me, np + integer(psb_lpk_) :: i, j, k, nrow, ncol, nlz + integer(psb_lpk_), allocatable :: ilv(:) + character(len=80) :: aname + logical, parameter :: debug=.false., dump=.false., debug_sync=.false. + + ictxt = desc_a%get_context() + call psb_info(ictxt,me,np) + + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + if (debug_sync) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'Start htranspose ' end if -end subroutine psb_lc_simple_glob_transpose + call ain%cp_to(tmpc1) + call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) + call aout%mv_from(tmpc2) + + if (dump) then + block + type(psb_lcspmat_type) :: aglb + + write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' + call aout%print(fname=aname,head='atrans ') + call psb_gather(aglb,aout,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i3.3,a)') 'atran.mtx' + call aglb%print(fname=aname,head='Test ') + end if + end block + end if + +end subroutine psb_c_simple_glob_transpose subroutine psb_lc_simple_glob_transpose_ip(ain,desc_a,info) use psb_base_mod, psb_protect_name => psb_lc_simple_glob_transpose_ip @@ -409,9 +469,7 @@ subroutine psb_lc_simple_glob_transpose_ip(ain,desc_a,info) ! that the same DESC_A works for both A and A^T, which ! essentially means that A has a symmetric pattern. ! - type(psb_lcspmat_type) :: atmp, ahalo, aglb - type(psb_lc_coo_sparse_mat) :: tmpcoo, tmpc1, tmpc2, tmpch - type(psb_lc_csr_sparse_mat) :: tmpcsr + type(psb_lc_coo_sparse_mat) :: tmpc1, tmpc2 integer(psb_ipk_) :: nz1, nz2, nzh, nz integer(psb_ipk_) :: ictxt, me, np integer(psb_lpk_) :: i, j, k, nrow, ncol, nlz @@ -430,82 +488,75 @@ subroutine psb_lc_simple_glob_transpose_ip(ain,desc_a,info) end if - if (.true.) then - call ain%mv_to(tmpc1) - call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) - call ain%mv_from(tmpc2) - else - - call ain%cscnv(tmpcsr,info) + call ain%mv_to(tmpc1) + call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) + call ain%mv_from(tmpc2) - if (debug) then - ilv = [(i,i=1,ncol)] - call desc_a%l2gip(ilv,info,owned=.false.) - write(aname,'(a,i3.3,a)') 'atmp-preh-',me,'.mtx' - call ain%print(fname=aname,head='atmp before haloTest ',iv=ilv) - end if - if (dump) then - call ain%cscnv(atmp,info) - call psb_gather(aglb,atmp,desc_a,info) + if (dump) then + block + type(psb_lcspmat_type) :: aglb + write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' + call ain%print(fname=aname,head='atrans ',iv=ilv) + call psb_gather(aglb,ain,desc_a,info) if (me==psb_root_) then - write(aname,'(a,i3.3,a)') 'aglob-prehalo.mtx' + write(aname,'(a,i3.3,a)') 'atran.mtx' call aglb%print(fname=aname,head='Test ') end if - end if + end block + end if - !call psb_loc_to_glob(tmpcsr%ja,desc_a,info) - call atmp%mv_from(tmpcsr) +end subroutine psb_lc_simple_glob_transpose_ip - if (debug) then - write(aname,'(a,i3.3,a)') 'tmpcsr-',me,'.mtx' - call atmp%print(fname=aname,head='tmpcsr ',iv=ilv) - !call psb_set_debug_level(9999) - end if +subroutine psb_lc_simple_glob_transpose(ain,aout,desc_a,info) + use psb_base_mod, psb_protect_name => psb_lc_simple_glob_transpose + implicit none + type(psb_lcspmat_type), intent(in) :: ain + type(psb_lcspmat_type), intent(out) :: aout + type(psb_desc_type) :: desc_a + integer(psb_ipk_), intent(out) :: info + + ! + ! BEWARE: This routine works under the assumption + ! that the same DESC_A works for both A and A^T, which + ! essentially means that A has a symmetric pattern. + ! + type(psb_lc_coo_sparse_mat) :: tmpc1, tmpc2 + integer(psb_ipk_) :: nz1, nz2, nzh, nz + integer(psb_ipk_) :: ictxt, me, np + integer(psb_lpk_) :: i, j, k, nrow, ncol, nlz + integer(psb_lpk_), allocatable :: ilv(:) + character(len=80) :: aname + logical, parameter :: debug=.false., dump=.false., debug_sync=.false. + + ictxt = desc_a%get_context() + call psb_info(ictxt,me,np) + + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + if (debug_sync) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'Start htranspose ' + end if + + + call ain%cp_to(tmpc1) + call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) + call aout%mv_from(tmpc2) - ! FIXME THIS NEEDS REWORKING - if (debug) write(0,*) me,' Htranspose into sphalo :',atmp%get_nrows(),atmp%get_ncols() - - call psb_sphalo(atmp,desc_a,ahalo,info, outfmt='coo ') - call atmp%mv_to(tmpcoo) - call ahalo%mv_to(tmpch) - nz1 = tmpcoo%get_nzeros() - nzh = tmpch%get_nzeros() - nlz = nz1+nzh - call tmpcoo%reallocate(nlz) - tmpcoo%ia(nz1+1:nz1+nzh) = tmpch%ia(1:nzh) - tmpcoo%ja(nz1+1:nz1+nzh) = tmpch%ja(1:nzh) - tmpcoo%val(nz1+1:nz1+nzh) = tmpch%val(1:nzh) - call psb_loc_to_glob(tmpcoo%ia(1:nlz),desc_a,info,iact='I') - call psb_loc_to_glob(tmpcoo%ja(1:nlz),desc_a,info,iact='I') - call tmpcoo%set_nzeros(nlz) - call tmpcoo%transp() - nz = tmpcoo%get_nzeros() - call psb_glob_to_loc(tmpcoo%ia(1:nz),desc_a,info,iact='I') - call psb_glob_to_loc(tmpcoo%ja(1:nz),desc_a,info,iact='I') - - call tmpcoo%clean_negidx(info) - - call ain%mv_from(tmpcoo) - call ain%csclip(info,imax=nrow) - - - if (debug) write(0,*) 'After clip:',ain%get_nzeros() - - if (debug_sync) then - call psb_barrier(ictxt) - if (me == 0) write(0,*) 'End htranspose ' - end if - !call aout%cscnv(info,type='csr') - endif if (dump) then - write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' - call ain%print(fname=aname,head='atrans ',iv=ilv) - call psb_gather(aglb,ain,desc_a,info) - if (me==psb_root_) then - write(aname,'(a,i3.3,a)') 'atran.mtx' - call aglb%print(fname=aname,head='Test ') - end if + block + type(psb_lcspmat_type) :: aglb + + write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' + call aout%print(fname=aname,head='atrans ',iv=ilv) + call psb_gather(aglb,aout,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i3.3,a)') 'atran.mtx' + call aglb%print(fname=aname,head='Test ') + end if + end block end if -end subroutine psb_lc_simple_glob_transpose_ip +end subroutine psb_lc_simple_glob_transpose + diff --git a/base/tools/psb_d_glob_transpose.F90 b/base/tools/psb_d_glob_transpose.F90 index 3ec7af92..636742e6 100644 --- a/base/tools/psb_d_glob_transpose.F90 +++ b/base/tools/psb_d_glob_transpose.F90 @@ -1,3 +1,45 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_d_glob_transpose.f90 +! +! Subroutine: psb_d_glob_transpose +! Version: real +! +! This file provides multiple related versions of a parallel +! global transpose +! +! B = A^T +! +! #undef SP_A2AV_MPI #undef SP_A2AV_XI #define SP_A2AV_MAT @@ -282,12 +324,41 @@ subroutine psb_ld_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) return end subroutine psb_ld_coo_glob_transpose +subroutine psb_d_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) +#ifdef MPI_MOD + use mpi +#endif + use psb_base_mod, psb_protect_name => psb_d_coo_glob_transpose + Implicit None +#ifdef MPI_H + include 'mpif.h' +#endif + type(psb_d_coo_sparse_mat), intent(inout) :: ain + type(psb_desc_type), intent(inout), target :: desc_r + type(psb_d_coo_sparse_mat), intent(out), optional :: atrans + type(psb_desc_type), intent(inout), target, optional :: desc_c + type(psb_desc_type), intent(out), optional :: desc_rx + integer(psb_ipk_), intent(out) :: info + + type(psb_ld_coo_sparse_mat) :: atcoo -subroutine psb_ld_simple_glob_transpose(ain,aout,desc_a,info) - use psb_base_mod, psb_protect_name => psb_ld_simple_glob_transpose + if (present(atrans)) then + call ain%cp_to_lcoo(atcoo,info) + else + call ain%mv_to_lcoo(atcoo,info) + end if + if (info == 0) call psb_glob_transpose(atcoo,desc_r,info,desc_c=desc_c,desc_rx=desc_rx) + if (present(atrans)) then + call atrans%mv_from_lcoo(atcoo,info) + else + call ain%mv_from_lcoo(atcoo,info) + end if +end subroutine psb_d_coo_glob_transpose + +subroutine psb_d_simple_glob_transpose_ip(ain,desc_a,info) + use psb_base_mod, psb_protect_name => psb_d_simple_glob_transpose_ip implicit none - type(psb_ldspmat_type), intent(in) :: ain - type(psb_ldspmat_type), intent(out) :: aout + type(psb_dspmat_type), intent(inout) :: ain type(psb_desc_type) :: desc_a integer(psb_ipk_), intent(out) :: info @@ -296,9 +367,7 @@ subroutine psb_ld_simple_glob_transpose(ain,aout,desc_a,info) ! that the same DESC_A works for both A and A^T, which ! essentially means that A has a symmetric pattern. ! - type(psb_ldspmat_type) :: atmp, ahalo, aglb - type(psb_ld_coo_sparse_mat) :: tmpcoo, tmpc1, tmpc2, tmpch - type(psb_ld_csr_sparse_mat) :: tmpcsr + type(psb_ld_coo_sparse_mat) :: tmpc1, tmpc2 integer(psb_ipk_) :: nz1, nz2, nzh, nz integer(psb_ipk_) :: ictxt, me, np integer(psb_lpk_) :: i, j, k, nrow, ncol, nlz @@ -317,85 +386,76 @@ subroutine psb_ld_simple_glob_transpose(ain,aout,desc_a,info) end if - if (.true.) then - call ain%cp_to(tmpc1) - call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) - call aout%mv_from(tmpc2) - else - - call ain%cscnv(tmpcsr,info) + call ain%mv_to(tmpc1) + call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) + call ain%mv_from(tmpc2) - if (debug) then - ilv = [(i,i=1,ncol)] - call desc_a%l2gip(ilv,info,owned=.false.) - write(aname,'(a,i3.3,a)') 'atmp-preh-',me,'.mtx' - call ain%print(fname=aname,head='atmp before haloTest ',iv=ilv) - end if - if (dump) then - call ain%cscnv(atmp,info) - call psb_gather(aglb,atmp,desc_a,info) + if (dump) then + block + type(psb_ldspmat_type) :: aglb + write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' + call ain%print(fname=aname,head='atrans ') + call psb_gather(aglb,ain,desc_a,info) if (me==psb_root_) then - write(aname,'(a,i3.3,a)') 'aglob-prehalo.mtx' + write(aname,'(a,i3.3,a)') 'atran.mtx' call aglb%print(fname=aname,head='Test ') end if - end if + end block + end if - !call psb_loc_to_glob(tmpcsr%ja,desc_a,info) - call atmp%mv_from(tmpcsr) +end subroutine psb_d_simple_glob_transpose_ip - if (debug) then - write(aname,'(a,i3.3,a)') 'tmpcsr-',me,'.mtx' - call atmp%print(fname=aname,head='tmpcsr ',iv=ilv) - !call psb_set_debug_level(9999) - end if +subroutine psb_d_simple_glob_transpose(ain,aout,desc_a,info) + use psb_base_mod, psb_protect_name => psb_d_simple_glob_transpose + implicit none + type(psb_dspmat_type), intent(in) :: ain + type(psb_dspmat_type), intent(out) :: aout + type(psb_desc_type) :: desc_a + integer(psb_ipk_), intent(out) :: info - ! FIXME THIS NEEDS REWORKING - if (debug) write(0,*) me,' Htranspose into sphalo :',atmp%get_nrows(),atmp%get_ncols() - - call psb_sphalo(atmp,desc_a,ahalo,info, outfmt='coo ') - call atmp%mv_to(tmpcoo) - call ahalo%mv_to(tmpch) - nz1 = tmpcoo%get_nzeros() - nzh = tmpch%get_nzeros() - nlz = nz1+nzh - call tmpcoo%reallocate(nlz) - tmpcoo%ia(nz1+1:nz1+nzh) = tmpch%ia(1:nzh) - tmpcoo%ja(nz1+1:nz1+nzh) = tmpch%ja(1:nzh) - tmpcoo%val(nz1+1:nz1+nzh) = tmpch%val(1:nzh) - call psb_loc_to_glob(tmpcoo%ia(1:nlz),desc_a,info,iact='I') - call psb_loc_to_glob(tmpcoo%ja(1:nlz),desc_a,info,iact='I') - call tmpcoo%set_nzeros(nlz) - call tmpcoo%transp() - nz = tmpcoo%get_nzeros() - call psb_glob_to_loc(tmpcoo%ia(1:nz),desc_a,info,iact='I') - call psb_glob_to_loc(tmpcoo%ja(1:nz),desc_a,info,iact='I') - - call tmpcoo%clean_negidx(info) - - call aout%mv_from(tmpcoo) - call aout%csclip(info,imax=nrow) - - - if (debug) write(0,*) 'After clip:',aout%get_nzeros() - - if (debug_sync) then - call psb_barrier(ictxt) - if (me == 0) write(0,*) 'End htranspose ' - end if - !call aout%cscnv(info,type='csr') - endif - if (dump) then - write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' - call aout%print(fname=aname,head='atrans ',iv=ilv) - call psb_gather(aglb,aout,desc_a,info) - if (me==psb_root_) then - write(aname,'(a,i3.3,a)') 'atran.mtx' - call aglb%print(fname=aname,head='Test ') - end if + ! + ! BEWARE: This routine works under the assumption + ! that the same DESC_A works for both A and A^T, which + ! essentially means that A has a symmetric pattern. + ! + type(psb_ld_coo_sparse_mat) :: tmpc1, tmpc2 + integer(psb_ipk_) :: nz1, nz2, nzh, nz + integer(psb_ipk_) :: ictxt, me, np + integer(psb_lpk_) :: i, j, k, nrow, ncol, nlz + integer(psb_lpk_), allocatable :: ilv(:) + character(len=80) :: aname + logical, parameter :: debug=.false., dump=.false., debug_sync=.false. + + ictxt = desc_a%get_context() + call psb_info(ictxt,me,np) + + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + if (debug_sync) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'Start htranspose ' end if -end subroutine psb_ld_simple_glob_transpose + call ain%cp_to(tmpc1) + call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) + call aout%mv_from(tmpc2) + + if (dump) then + block + type(psb_ldspmat_type) :: aglb + + write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' + call aout%print(fname=aname,head='atrans ') + call psb_gather(aglb,aout,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i3.3,a)') 'atran.mtx' + call aglb%print(fname=aname,head='Test ') + end if + end block + end if + +end subroutine psb_d_simple_glob_transpose subroutine psb_ld_simple_glob_transpose_ip(ain,desc_a,info) use psb_base_mod, psb_protect_name => psb_ld_simple_glob_transpose_ip @@ -409,9 +469,7 @@ subroutine psb_ld_simple_glob_transpose_ip(ain,desc_a,info) ! that the same DESC_A works for both A and A^T, which ! essentially means that A has a symmetric pattern. ! - type(psb_ldspmat_type) :: atmp, ahalo, aglb - type(psb_ld_coo_sparse_mat) :: tmpcoo, tmpc1, tmpc2, tmpch - type(psb_ld_csr_sparse_mat) :: tmpcsr + type(psb_ld_coo_sparse_mat) :: tmpc1, tmpc2 integer(psb_ipk_) :: nz1, nz2, nzh, nz integer(psb_ipk_) :: ictxt, me, np integer(psb_lpk_) :: i, j, k, nrow, ncol, nlz @@ -430,82 +488,75 @@ subroutine psb_ld_simple_glob_transpose_ip(ain,desc_a,info) end if - if (.true.) then - call ain%mv_to(tmpc1) - call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) - call ain%mv_from(tmpc2) - else - - call ain%cscnv(tmpcsr,info) + call ain%mv_to(tmpc1) + call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) + call ain%mv_from(tmpc2) - if (debug) then - ilv = [(i,i=1,ncol)] - call desc_a%l2gip(ilv,info,owned=.false.) - write(aname,'(a,i3.3,a)') 'atmp-preh-',me,'.mtx' - call ain%print(fname=aname,head='atmp before haloTest ',iv=ilv) - end if - if (dump) then - call ain%cscnv(atmp,info) - call psb_gather(aglb,atmp,desc_a,info) + if (dump) then + block + type(psb_ldspmat_type) :: aglb + write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' + call ain%print(fname=aname,head='atrans ',iv=ilv) + call psb_gather(aglb,ain,desc_a,info) if (me==psb_root_) then - write(aname,'(a,i3.3,a)') 'aglob-prehalo.mtx' + write(aname,'(a,i3.3,a)') 'atran.mtx' call aglb%print(fname=aname,head='Test ') end if - end if + end block + end if - !call psb_loc_to_glob(tmpcsr%ja,desc_a,info) - call atmp%mv_from(tmpcsr) +end subroutine psb_ld_simple_glob_transpose_ip - if (debug) then - write(aname,'(a,i3.3,a)') 'tmpcsr-',me,'.mtx' - call atmp%print(fname=aname,head='tmpcsr ',iv=ilv) - !call psb_set_debug_level(9999) - end if +subroutine psb_ld_simple_glob_transpose(ain,aout,desc_a,info) + use psb_base_mod, psb_protect_name => psb_ld_simple_glob_transpose + implicit none + type(psb_ldspmat_type), intent(in) :: ain + type(psb_ldspmat_type), intent(out) :: aout + type(psb_desc_type) :: desc_a + integer(psb_ipk_), intent(out) :: info + + ! + ! BEWARE: This routine works under the assumption + ! that the same DESC_A works for both A and A^T, which + ! essentially means that A has a symmetric pattern. + ! + type(psb_ld_coo_sparse_mat) :: tmpc1, tmpc2 + integer(psb_ipk_) :: nz1, nz2, nzh, nz + integer(psb_ipk_) :: ictxt, me, np + integer(psb_lpk_) :: i, j, k, nrow, ncol, nlz + integer(psb_lpk_), allocatable :: ilv(:) + character(len=80) :: aname + logical, parameter :: debug=.false., dump=.false., debug_sync=.false. + + ictxt = desc_a%get_context() + call psb_info(ictxt,me,np) + + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + if (debug_sync) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'Start htranspose ' + end if + + + call ain%cp_to(tmpc1) + call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) + call aout%mv_from(tmpc2) - ! FIXME THIS NEEDS REWORKING - if (debug) write(0,*) me,' Htranspose into sphalo :',atmp%get_nrows(),atmp%get_ncols() - - call psb_sphalo(atmp,desc_a,ahalo,info, outfmt='coo ') - call atmp%mv_to(tmpcoo) - call ahalo%mv_to(tmpch) - nz1 = tmpcoo%get_nzeros() - nzh = tmpch%get_nzeros() - nlz = nz1+nzh - call tmpcoo%reallocate(nlz) - tmpcoo%ia(nz1+1:nz1+nzh) = tmpch%ia(1:nzh) - tmpcoo%ja(nz1+1:nz1+nzh) = tmpch%ja(1:nzh) - tmpcoo%val(nz1+1:nz1+nzh) = tmpch%val(1:nzh) - call psb_loc_to_glob(tmpcoo%ia(1:nlz),desc_a,info,iact='I') - call psb_loc_to_glob(tmpcoo%ja(1:nlz),desc_a,info,iact='I') - call tmpcoo%set_nzeros(nlz) - call tmpcoo%transp() - nz = tmpcoo%get_nzeros() - call psb_glob_to_loc(tmpcoo%ia(1:nz),desc_a,info,iact='I') - call psb_glob_to_loc(tmpcoo%ja(1:nz),desc_a,info,iact='I') - - call tmpcoo%clean_negidx(info) - - call ain%mv_from(tmpcoo) - call ain%csclip(info,imax=nrow) - - - if (debug) write(0,*) 'After clip:',ain%get_nzeros() - - if (debug_sync) then - call psb_barrier(ictxt) - if (me == 0) write(0,*) 'End htranspose ' - end if - !call aout%cscnv(info,type='csr') - endif if (dump) then - write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' - call ain%print(fname=aname,head='atrans ',iv=ilv) - call psb_gather(aglb,ain,desc_a,info) - if (me==psb_root_) then - write(aname,'(a,i3.3,a)') 'atran.mtx' - call aglb%print(fname=aname,head='Test ') - end if + block + type(psb_ldspmat_type) :: aglb + + write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' + call aout%print(fname=aname,head='atrans ',iv=ilv) + call psb_gather(aglb,aout,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i3.3,a)') 'atran.mtx' + call aglb%print(fname=aname,head='Test ') + end if + end block end if -end subroutine psb_ld_simple_glob_transpose_ip +end subroutine psb_ld_simple_glob_transpose + diff --git a/base/tools/psb_s_glob_transpose.F90 b/base/tools/psb_s_glob_transpose.F90 index 9e3738eb..4e5714bb 100644 --- a/base/tools/psb_s_glob_transpose.F90 +++ b/base/tools/psb_s_glob_transpose.F90 @@ -1,3 +1,45 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_s_glob_transpose.f90 +! +! Subroutine: psb_s_glob_transpose +! Version: real +! +! This file provides multiple related versions of a parallel +! global transpose +! +! B = A^T +! +! #undef SP_A2AV_MPI #undef SP_A2AV_XI #define SP_A2AV_MAT @@ -282,12 +324,41 @@ subroutine psb_ls_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) return end subroutine psb_ls_coo_glob_transpose +subroutine psb_s_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) +#ifdef MPI_MOD + use mpi +#endif + use psb_base_mod, psb_protect_name => psb_s_coo_glob_transpose + Implicit None +#ifdef MPI_H + include 'mpif.h' +#endif + type(psb_s_coo_sparse_mat), intent(inout) :: ain + type(psb_desc_type), intent(inout), target :: desc_r + type(psb_s_coo_sparse_mat), intent(out), optional :: atrans + type(psb_desc_type), intent(inout), target, optional :: desc_c + type(psb_desc_type), intent(out), optional :: desc_rx + integer(psb_ipk_), intent(out) :: info + + type(psb_ls_coo_sparse_mat) :: atcoo -subroutine psb_ls_simple_glob_transpose(ain,aout,desc_a,info) - use psb_base_mod, psb_protect_name => psb_ls_simple_glob_transpose + if (present(atrans)) then + call ain%cp_to_lcoo(atcoo,info) + else + call ain%mv_to_lcoo(atcoo,info) + end if + if (info == 0) call psb_glob_transpose(atcoo,desc_r,info,desc_c=desc_c,desc_rx=desc_rx) + if (present(atrans)) then + call atrans%mv_from_lcoo(atcoo,info) + else + call ain%mv_from_lcoo(atcoo,info) + end if +end subroutine psb_s_coo_glob_transpose + +subroutine psb_s_simple_glob_transpose_ip(ain,desc_a,info) + use psb_base_mod, psb_protect_name => psb_s_simple_glob_transpose_ip implicit none - type(psb_lsspmat_type), intent(in) :: ain - type(psb_lsspmat_type), intent(out) :: aout + type(psb_sspmat_type), intent(inout) :: ain type(psb_desc_type) :: desc_a integer(psb_ipk_), intent(out) :: info @@ -296,9 +367,7 @@ subroutine psb_ls_simple_glob_transpose(ain,aout,desc_a,info) ! that the same DESC_A works for both A and A^T, which ! essentially means that A has a symmetric pattern. ! - type(psb_lsspmat_type) :: atmp, ahalo, aglb - type(psb_ls_coo_sparse_mat) :: tmpcoo, tmpc1, tmpc2, tmpch - type(psb_ls_csr_sparse_mat) :: tmpcsr + type(psb_ls_coo_sparse_mat) :: tmpc1, tmpc2 integer(psb_ipk_) :: nz1, nz2, nzh, nz integer(psb_ipk_) :: ictxt, me, np integer(psb_lpk_) :: i, j, k, nrow, ncol, nlz @@ -317,85 +386,76 @@ subroutine psb_ls_simple_glob_transpose(ain,aout,desc_a,info) end if - if (.true.) then - call ain%cp_to(tmpc1) - call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) - call aout%mv_from(tmpc2) - else - - call ain%cscnv(tmpcsr,info) + call ain%mv_to(tmpc1) + call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) + call ain%mv_from(tmpc2) - if (debug) then - ilv = [(i,i=1,ncol)] - call desc_a%l2gip(ilv,info,owned=.false.) - write(aname,'(a,i3.3,a)') 'atmp-preh-',me,'.mtx' - call ain%print(fname=aname,head='atmp before haloTest ',iv=ilv) - end if - if (dump) then - call ain%cscnv(atmp,info) - call psb_gather(aglb,atmp,desc_a,info) + if (dump) then + block + type(psb_lsspmat_type) :: aglb + write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' + call ain%print(fname=aname,head='atrans ') + call psb_gather(aglb,ain,desc_a,info) if (me==psb_root_) then - write(aname,'(a,i3.3,a)') 'aglob-prehalo.mtx' + write(aname,'(a,i3.3,a)') 'atran.mtx' call aglb%print(fname=aname,head='Test ') end if - end if + end block + end if - !call psb_loc_to_glob(tmpcsr%ja,desc_a,info) - call atmp%mv_from(tmpcsr) +end subroutine psb_s_simple_glob_transpose_ip - if (debug) then - write(aname,'(a,i3.3,a)') 'tmpcsr-',me,'.mtx' - call atmp%print(fname=aname,head='tmpcsr ',iv=ilv) - !call psb_set_debug_level(9999) - end if +subroutine psb_s_simple_glob_transpose(ain,aout,desc_a,info) + use psb_base_mod, psb_protect_name => psb_s_simple_glob_transpose + implicit none + type(psb_sspmat_type), intent(in) :: ain + type(psb_sspmat_type), intent(out) :: aout + type(psb_desc_type) :: desc_a + integer(psb_ipk_), intent(out) :: info - ! FIXME THIS NEEDS REWORKING - if (debug) write(0,*) me,' Htranspose into sphalo :',atmp%get_nrows(),atmp%get_ncols() - - call psb_sphalo(atmp,desc_a,ahalo,info, outfmt='coo ') - call atmp%mv_to(tmpcoo) - call ahalo%mv_to(tmpch) - nz1 = tmpcoo%get_nzeros() - nzh = tmpch%get_nzeros() - nlz = nz1+nzh - call tmpcoo%reallocate(nlz) - tmpcoo%ia(nz1+1:nz1+nzh) = tmpch%ia(1:nzh) - tmpcoo%ja(nz1+1:nz1+nzh) = tmpch%ja(1:nzh) - tmpcoo%val(nz1+1:nz1+nzh) = tmpch%val(1:nzh) - call psb_loc_to_glob(tmpcoo%ia(1:nlz),desc_a,info,iact='I') - call psb_loc_to_glob(tmpcoo%ja(1:nlz),desc_a,info,iact='I') - call tmpcoo%set_nzeros(nlz) - call tmpcoo%transp() - nz = tmpcoo%get_nzeros() - call psb_glob_to_loc(tmpcoo%ia(1:nz),desc_a,info,iact='I') - call psb_glob_to_loc(tmpcoo%ja(1:nz),desc_a,info,iact='I') - - call tmpcoo%clean_negidx(info) - - call aout%mv_from(tmpcoo) - call aout%csclip(info,imax=nrow) - - - if (debug) write(0,*) 'After clip:',aout%get_nzeros() - - if (debug_sync) then - call psb_barrier(ictxt) - if (me == 0) write(0,*) 'End htranspose ' - end if - !call aout%cscnv(info,type='csr') - endif - if (dump) then - write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' - call aout%print(fname=aname,head='atrans ',iv=ilv) - call psb_gather(aglb,aout,desc_a,info) - if (me==psb_root_) then - write(aname,'(a,i3.3,a)') 'atran.mtx' - call aglb%print(fname=aname,head='Test ') - end if + ! + ! BEWARE: This routine works under the assumption + ! that the same DESC_A works for both A and A^T, which + ! essentially means that A has a symmetric pattern. + ! + type(psb_ls_coo_sparse_mat) :: tmpc1, tmpc2 + integer(psb_ipk_) :: nz1, nz2, nzh, nz + integer(psb_ipk_) :: ictxt, me, np + integer(psb_lpk_) :: i, j, k, nrow, ncol, nlz + integer(psb_lpk_), allocatable :: ilv(:) + character(len=80) :: aname + logical, parameter :: debug=.false., dump=.false., debug_sync=.false. + + ictxt = desc_a%get_context() + call psb_info(ictxt,me,np) + + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + if (debug_sync) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'Start htranspose ' end if -end subroutine psb_ls_simple_glob_transpose + call ain%cp_to(tmpc1) + call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) + call aout%mv_from(tmpc2) + + if (dump) then + block + type(psb_lsspmat_type) :: aglb + + write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' + call aout%print(fname=aname,head='atrans ') + call psb_gather(aglb,aout,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i3.3,a)') 'atran.mtx' + call aglb%print(fname=aname,head='Test ') + end if + end block + end if + +end subroutine psb_s_simple_glob_transpose subroutine psb_ls_simple_glob_transpose_ip(ain,desc_a,info) use psb_base_mod, psb_protect_name => psb_ls_simple_glob_transpose_ip @@ -409,9 +469,7 @@ subroutine psb_ls_simple_glob_transpose_ip(ain,desc_a,info) ! that the same DESC_A works for both A and A^T, which ! essentially means that A has a symmetric pattern. ! - type(psb_lsspmat_type) :: atmp, ahalo, aglb - type(psb_ls_coo_sparse_mat) :: tmpcoo, tmpc1, tmpc2, tmpch - type(psb_ls_csr_sparse_mat) :: tmpcsr + type(psb_ls_coo_sparse_mat) :: tmpc1, tmpc2 integer(psb_ipk_) :: nz1, nz2, nzh, nz integer(psb_ipk_) :: ictxt, me, np integer(psb_lpk_) :: i, j, k, nrow, ncol, nlz @@ -430,82 +488,75 @@ subroutine psb_ls_simple_glob_transpose_ip(ain,desc_a,info) end if - if (.true.) then - call ain%mv_to(tmpc1) - call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) - call ain%mv_from(tmpc2) - else - - call ain%cscnv(tmpcsr,info) + call ain%mv_to(tmpc1) + call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) + call ain%mv_from(tmpc2) - if (debug) then - ilv = [(i,i=1,ncol)] - call desc_a%l2gip(ilv,info,owned=.false.) - write(aname,'(a,i3.3,a)') 'atmp-preh-',me,'.mtx' - call ain%print(fname=aname,head='atmp before haloTest ',iv=ilv) - end if - if (dump) then - call ain%cscnv(atmp,info) - call psb_gather(aglb,atmp,desc_a,info) + if (dump) then + block + type(psb_lsspmat_type) :: aglb + write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' + call ain%print(fname=aname,head='atrans ',iv=ilv) + call psb_gather(aglb,ain,desc_a,info) if (me==psb_root_) then - write(aname,'(a,i3.3,a)') 'aglob-prehalo.mtx' + write(aname,'(a,i3.3,a)') 'atran.mtx' call aglb%print(fname=aname,head='Test ') end if - end if + end block + end if - !call psb_loc_to_glob(tmpcsr%ja,desc_a,info) - call atmp%mv_from(tmpcsr) +end subroutine psb_ls_simple_glob_transpose_ip - if (debug) then - write(aname,'(a,i3.3,a)') 'tmpcsr-',me,'.mtx' - call atmp%print(fname=aname,head='tmpcsr ',iv=ilv) - !call psb_set_debug_level(9999) - end if +subroutine psb_ls_simple_glob_transpose(ain,aout,desc_a,info) + use psb_base_mod, psb_protect_name => psb_ls_simple_glob_transpose + implicit none + type(psb_lsspmat_type), intent(in) :: ain + type(psb_lsspmat_type), intent(out) :: aout + type(psb_desc_type) :: desc_a + integer(psb_ipk_), intent(out) :: info + + ! + ! BEWARE: This routine works under the assumption + ! that the same DESC_A works for both A and A^T, which + ! essentially means that A has a symmetric pattern. + ! + type(psb_ls_coo_sparse_mat) :: tmpc1, tmpc2 + integer(psb_ipk_) :: nz1, nz2, nzh, nz + integer(psb_ipk_) :: ictxt, me, np + integer(psb_lpk_) :: i, j, k, nrow, ncol, nlz + integer(psb_lpk_), allocatable :: ilv(:) + character(len=80) :: aname + logical, parameter :: debug=.false., dump=.false., debug_sync=.false. + + ictxt = desc_a%get_context() + call psb_info(ictxt,me,np) + + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + if (debug_sync) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'Start htranspose ' + end if + + + call ain%cp_to(tmpc1) + call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) + call aout%mv_from(tmpc2) - ! FIXME THIS NEEDS REWORKING - if (debug) write(0,*) me,' Htranspose into sphalo :',atmp%get_nrows(),atmp%get_ncols() - - call psb_sphalo(atmp,desc_a,ahalo,info, outfmt='coo ') - call atmp%mv_to(tmpcoo) - call ahalo%mv_to(tmpch) - nz1 = tmpcoo%get_nzeros() - nzh = tmpch%get_nzeros() - nlz = nz1+nzh - call tmpcoo%reallocate(nlz) - tmpcoo%ia(nz1+1:nz1+nzh) = tmpch%ia(1:nzh) - tmpcoo%ja(nz1+1:nz1+nzh) = tmpch%ja(1:nzh) - tmpcoo%val(nz1+1:nz1+nzh) = tmpch%val(1:nzh) - call psb_loc_to_glob(tmpcoo%ia(1:nlz),desc_a,info,iact='I') - call psb_loc_to_glob(tmpcoo%ja(1:nlz),desc_a,info,iact='I') - call tmpcoo%set_nzeros(nlz) - call tmpcoo%transp() - nz = tmpcoo%get_nzeros() - call psb_glob_to_loc(tmpcoo%ia(1:nz),desc_a,info,iact='I') - call psb_glob_to_loc(tmpcoo%ja(1:nz),desc_a,info,iact='I') - - call tmpcoo%clean_negidx(info) - - call ain%mv_from(tmpcoo) - call ain%csclip(info,imax=nrow) - - - if (debug) write(0,*) 'After clip:',ain%get_nzeros() - - if (debug_sync) then - call psb_barrier(ictxt) - if (me == 0) write(0,*) 'End htranspose ' - end if - !call aout%cscnv(info,type='csr') - endif if (dump) then - write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' - call ain%print(fname=aname,head='atrans ',iv=ilv) - call psb_gather(aglb,ain,desc_a,info) - if (me==psb_root_) then - write(aname,'(a,i3.3,a)') 'atran.mtx' - call aglb%print(fname=aname,head='Test ') - end if + block + type(psb_lsspmat_type) :: aglb + + write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' + call aout%print(fname=aname,head='atrans ',iv=ilv) + call psb_gather(aglb,aout,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i3.3,a)') 'atran.mtx' + call aglb%print(fname=aname,head='Test ') + end if + end block end if -end subroutine psb_ls_simple_glob_transpose_ip +end subroutine psb_ls_simple_glob_transpose + diff --git a/base/tools/psb_z_glob_transpose.F90 b/base/tools/psb_z_glob_transpose.F90 index 4ef7ff17..5e6bef2f 100644 --- a/base/tools/psb_z_glob_transpose.F90 +++ b/base/tools/psb_z_glob_transpose.F90 @@ -1,3 +1,45 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_z_glob_transpose.f90 +! +! Subroutine: psb_z_glob_transpose +! Version: complex +! +! This file provides multiple related versions of a parallel +! global transpose +! +! B = A^T +! +! #undef SP_A2AV_MPI #undef SP_A2AV_XI #define SP_A2AV_MAT @@ -282,12 +324,41 @@ subroutine psb_lz_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) return end subroutine psb_lz_coo_glob_transpose +subroutine psb_z_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) +#ifdef MPI_MOD + use mpi +#endif + use psb_base_mod, psb_protect_name => psb_z_coo_glob_transpose + Implicit None +#ifdef MPI_H + include 'mpif.h' +#endif + type(psb_z_coo_sparse_mat), intent(inout) :: ain + type(psb_desc_type), intent(inout), target :: desc_r + type(psb_z_coo_sparse_mat), intent(out), optional :: atrans + type(psb_desc_type), intent(inout), target, optional :: desc_c + type(psb_desc_type), intent(out), optional :: desc_rx + integer(psb_ipk_), intent(out) :: info + + type(psb_lz_coo_sparse_mat) :: atcoo -subroutine psb_lz_simple_glob_transpose(ain,aout,desc_a,info) - use psb_base_mod, psb_protect_name => psb_lz_simple_glob_transpose + if (present(atrans)) then + call ain%cp_to_lcoo(atcoo,info) + else + call ain%mv_to_lcoo(atcoo,info) + end if + if (info == 0) call psb_glob_transpose(atcoo,desc_r,info,desc_c=desc_c,desc_rx=desc_rx) + if (present(atrans)) then + call atrans%mv_from_lcoo(atcoo,info) + else + call ain%mv_from_lcoo(atcoo,info) + end if +end subroutine psb_z_coo_glob_transpose + +subroutine psb_z_simple_glob_transpose_ip(ain,desc_a,info) + use psb_base_mod, psb_protect_name => psb_z_simple_glob_transpose_ip implicit none - type(psb_lzspmat_type), intent(in) :: ain - type(psb_lzspmat_type), intent(out) :: aout + type(psb_zspmat_type), intent(inout) :: ain type(psb_desc_type) :: desc_a integer(psb_ipk_), intent(out) :: info @@ -296,9 +367,7 @@ subroutine psb_lz_simple_glob_transpose(ain,aout,desc_a,info) ! that the same DESC_A works for both A and A^T, which ! essentially means that A has a symmetric pattern. ! - type(psb_lzspmat_type) :: atmp, ahalo, aglb - type(psb_lz_coo_sparse_mat) :: tmpcoo, tmpc1, tmpc2, tmpch - type(psb_lz_csr_sparse_mat) :: tmpcsr + type(psb_lz_coo_sparse_mat) :: tmpc1, tmpc2 integer(psb_ipk_) :: nz1, nz2, nzh, nz integer(psb_ipk_) :: ictxt, me, np integer(psb_lpk_) :: i, j, k, nrow, ncol, nlz @@ -317,85 +386,76 @@ subroutine psb_lz_simple_glob_transpose(ain,aout,desc_a,info) end if - if (.true.) then - call ain%cp_to(tmpc1) - call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) - call aout%mv_from(tmpc2) - else - - call ain%cscnv(tmpcsr,info) + call ain%mv_to(tmpc1) + call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) + call ain%mv_from(tmpc2) - if (debug) then - ilv = [(i,i=1,ncol)] - call desc_a%l2gip(ilv,info,owned=.false.) - write(aname,'(a,i3.3,a)') 'atmp-preh-',me,'.mtx' - call ain%print(fname=aname,head='atmp before haloTest ',iv=ilv) - end if - if (dump) then - call ain%cscnv(atmp,info) - call psb_gather(aglb,atmp,desc_a,info) + if (dump) then + block + type(psb_lzspmat_type) :: aglb + write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' + call ain%print(fname=aname,head='atrans ') + call psb_gather(aglb,ain,desc_a,info) if (me==psb_root_) then - write(aname,'(a,i3.3,a)') 'aglob-prehalo.mtx' + write(aname,'(a,i3.3,a)') 'atran.mtx' call aglb%print(fname=aname,head='Test ') end if - end if + end block + end if - !call psb_loc_to_glob(tmpcsr%ja,desc_a,info) - call atmp%mv_from(tmpcsr) +end subroutine psb_z_simple_glob_transpose_ip - if (debug) then - write(aname,'(a,i3.3,a)') 'tmpcsr-',me,'.mtx' - call atmp%print(fname=aname,head='tmpcsr ',iv=ilv) - !call psb_set_debug_level(9999) - end if +subroutine psb_z_simple_glob_transpose(ain,aout,desc_a,info) + use psb_base_mod, psb_protect_name => psb_z_simple_glob_transpose + implicit none + type(psb_zspmat_type), intent(in) :: ain + type(psb_zspmat_type), intent(out) :: aout + type(psb_desc_type) :: desc_a + integer(psb_ipk_), intent(out) :: info - ! FIXME THIS NEEDS REWORKING - if (debug) write(0,*) me,' Htranspose into sphalo :',atmp%get_nrows(),atmp%get_ncols() - - call psb_sphalo(atmp,desc_a,ahalo,info, outfmt='coo ') - call atmp%mv_to(tmpcoo) - call ahalo%mv_to(tmpch) - nz1 = tmpcoo%get_nzeros() - nzh = tmpch%get_nzeros() - nlz = nz1+nzh - call tmpcoo%reallocate(nlz) - tmpcoo%ia(nz1+1:nz1+nzh) = tmpch%ia(1:nzh) - tmpcoo%ja(nz1+1:nz1+nzh) = tmpch%ja(1:nzh) - tmpcoo%val(nz1+1:nz1+nzh) = tmpch%val(1:nzh) - call psb_loc_to_glob(tmpcoo%ia(1:nlz),desc_a,info,iact='I') - call psb_loc_to_glob(tmpcoo%ja(1:nlz),desc_a,info,iact='I') - call tmpcoo%set_nzeros(nlz) - call tmpcoo%transp() - nz = tmpcoo%get_nzeros() - call psb_glob_to_loc(tmpcoo%ia(1:nz),desc_a,info,iact='I') - call psb_glob_to_loc(tmpcoo%ja(1:nz),desc_a,info,iact='I') - - call tmpcoo%clean_negidx(info) - - call aout%mv_from(tmpcoo) - call aout%csclip(info,imax=nrow) - - - if (debug) write(0,*) 'After clip:',aout%get_nzeros() - - if (debug_sync) then - call psb_barrier(ictxt) - if (me == 0) write(0,*) 'End htranspose ' - end if - !call aout%cscnv(info,type='csr') - endif - if (dump) then - write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' - call aout%print(fname=aname,head='atrans ',iv=ilv) - call psb_gather(aglb,aout,desc_a,info) - if (me==psb_root_) then - write(aname,'(a,i3.3,a)') 'atran.mtx' - call aglb%print(fname=aname,head='Test ') - end if + ! + ! BEWARE: This routine works under the assumption + ! that the same DESC_A works for both A and A^T, which + ! essentially means that A has a symmetric pattern. + ! + type(psb_lz_coo_sparse_mat) :: tmpc1, tmpc2 + integer(psb_ipk_) :: nz1, nz2, nzh, nz + integer(psb_ipk_) :: ictxt, me, np + integer(psb_lpk_) :: i, j, k, nrow, ncol, nlz + integer(psb_lpk_), allocatable :: ilv(:) + character(len=80) :: aname + logical, parameter :: debug=.false., dump=.false., debug_sync=.false. + + ictxt = desc_a%get_context() + call psb_info(ictxt,me,np) + + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + if (debug_sync) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'Start htranspose ' end if -end subroutine psb_lz_simple_glob_transpose + call ain%cp_to(tmpc1) + call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) + call aout%mv_from(tmpc2) + + if (dump) then + block + type(psb_lzspmat_type) :: aglb + + write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' + call aout%print(fname=aname,head='atrans ') + call psb_gather(aglb,aout,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i3.3,a)') 'atran.mtx' + call aglb%print(fname=aname,head='Test ') + end if + end block + end if + +end subroutine psb_z_simple_glob_transpose subroutine psb_lz_simple_glob_transpose_ip(ain,desc_a,info) use psb_base_mod, psb_protect_name => psb_lz_simple_glob_transpose_ip @@ -409,9 +469,7 @@ subroutine psb_lz_simple_glob_transpose_ip(ain,desc_a,info) ! that the same DESC_A works for both A and A^T, which ! essentially means that A has a symmetric pattern. ! - type(psb_lzspmat_type) :: atmp, ahalo, aglb - type(psb_lz_coo_sparse_mat) :: tmpcoo, tmpc1, tmpc2, tmpch - type(psb_lz_csr_sparse_mat) :: tmpcsr + type(psb_lz_coo_sparse_mat) :: tmpc1, tmpc2 integer(psb_ipk_) :: nz1, nz2, nzh, nz integer(psb_ipk_) :: ictxt, me, np integer(psb_lpk_) :: i, j, k, nrow, ncol, nlz @@ -430,82 +488,75 @@ subroutine psb_lz_simple_glob_transpose_ip(ain,desc_a,info) end if - if (.true.) then - call ain%mv_to(tmpc1) - call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) - call ain%mv_from(tmpc2) - else - - call ain%cscnv(tmpcsr,info) + call ain%mv_to(tmpc1) + call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) + call ain%mv_from(tmpc2) - if (debug) then - ilv = [(i,i=1,ncol)] - call desc_a%l2gip(ilv,info,owned=.false.) - write(aname,'(a,i3.3,a)') 'atmp-preh-',me,'.mtx' - call ain%print(fname=aname,head='atmp before haloTest ',iv=ilv) - end if - if (dump) then - call ain%cscnv(atmp,info) - call psb_gather(aglb,atmp,desc_a,info) + if (dump) then + block + type(psb_lzspmat_type) :: aglb + write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' + call ain%print(fname=aname,head='atrans ',iv=ilv) + call psb_gather(aglb,ain,desc_a,info) if (me==psb_root_) then - write(aname,'(a,i3.3,a)') 'aglob-prehalo.mtx' + write(aname,'(a,i3.3,a)') 'atran.mtx' call aglb%print(fname=aname,head='Test ') end if - end if + end block + end if - !call psb_loc_to_glob(tmpcsr%ja,desc_a,info) - call atmp%mv_from(tmpcsr) +end subroutine psb_lz_simple_glob_transpose_ip - if (debug) then - write(aname,'(a,i3.3,a)') 'tmpcsr-',me,'.mtx' - call atmp%print(fname=aname,head='tmpcsr ',iv=ilv) - !call psb_set_debug_level(9999) - end if +subroutine psb_lz_simple_glob_transpose(ain,aout,desc_a,info) + use psb_base_mod, psb_protect_name => psb_lz_simple_glob_transpose + implicit none + type(psb_lzspmat_type), intent(in) :: ain + type(psb_lzspmat_type), intent(out) :: aout + type(psb_desc_type) :: desc_a + integer(psb_ipk_), intent(out) :: info + + ! + ! BEWARE: This routine works under the assumption + ! that the same DESC_A works for both A and A^T, which + ! essentially means that A has a symmetric pattern. + ! + type(psb_lz_coo_sparse_mat) :: tmpc1, tmpc2 + integer(psb_ipk_) :: nz1, nz2, nzh, nz + integer(psb_ipk_) :: ictxt, me, np + integer(psb_lpk_) :: i, j, k, nrow, ncol, nlz + integer(psb_lpk_), allocatable :: ilv(:) + character(len=80) :: aname + logical, parameter :: debug=.false., dump=.false., debug_sync=.false. + + ictxt = desc_a%get_context() + call psb_info(ictxt,me,np) + + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + if (debug_sync) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'Start htranspose ' + end if + + + call ain%cp_to(tmpc1) + call psb_glob_transpose(tmpc1, desc_a,info,atrans=tmpc2) + call aout%mv_from(tmpc2) - ! FIXME THIS NEEDS REWORKING - if (debug) write(0,*) me,' Htranspose into sphalo :',atmp%get_nrows(),atmp%get_ncols() - - call psb_sphalo(atmp,desc_a,ahalo,info, outfmt='coo ') - call atmp%mv_to(tmpcoo) - call ahalo%mv_to(tmpch) - nz1 = tmpcoo%get_nzeros() - nzh = tmpch%get_nzeros() - nlz = nz1+nzh - call tmpcoo%reallocate(nlz) - tmpcoo%ia(nz1+1:nz1+nzh) = tmpch%ia(1:nzh) - tmpcoo%ja(nz1+1:nz1+nzh) = tmpch%ja(1:nzh) - tmpcoo%val(nz1+1:nz1+nzh) = tmpch%val(1:nzh) - call psb_loc_to_glob(tmpcoo%ia(1:nlz),desc_a,info,iact='I') - call psb_loc_to_glob(tmpcoo%ja(1:nlz),desc_a,info,iact='I') - call tmpcoo%set_nzeros(nlz) - call tmpcoo%transp() - nz = tmpcoo%get_nzeros() - call psb_glob_to_loc(tmpcoo%ia(1:nz),desc_a,info,iact='I') - call psb_glob_to_loc(tmpcoo%ja(1:nz),desc_a,info,iact='I') - - call tmpcoo%clean_negidx(info) - - call ain%mv_from(tmpcoo) - call ain%csclip(info,imax=nrow) - - - if (debug) write(0,*) 'After clip:',ain%get_nzeros() - - if (debug_sync) then - call psb_barrier(ictxt) - if (me == 0) write(0,*) 'End htranspose ' - end if - !call aout%cscnv(info,type='csr') - endif if (dump) then - write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' - call ain%print(fname=aname,head='atrans ',iv=ilv) - call psb_gather(aglb,ain,desc_a,info) - if (me==psb_root_) then - write(aname,'(a,i3.3,a)') 'atran.mtx' - call aglb%print(fname=aname,head='Test ') - end if + block + type(psb_lzspmat_type) :: aglb + + write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' + call aout%print(fname=aname,head='atrans ',iv=ilv) + call psb_gather(aglb,aout,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i3.3,a)') 'atran.mtx' + call aglb%print(fname=aname,head='Test ') + end if + end block end if -end subroutine psb_lz_simple_glob_transpose_ip +end subroutine psb_lz_simple_glob_transpose +