diff --git a/base/internals/Makefile b/base/internals/Makefile index d2993274..f2b1c116 100644 --- a/base/internals/Makefile +++ b/base/internals/Makefile @@ -2,6 +2,7 @@ include ../../Make.inc FOBJS = psi_compute_size.o psi_crea_bnd_elem.o psi_crea_index.o \ psi_crea_ovr_elem.o psi_dl_check.o \ + psi_gthsct_mod.o \ psi_sort_dl.o \ psi_ldsc_pre_halo.o\ psi_sort_dl.o psi_idx_cnv.o psi_idx_ins_cnv.o psi_fnd_owner.o @@ -22,7 +23,7 @@ lib: mpfobjs $(FOBJS) $(FOBJS2) $(COBJS) $(MPFOBJS2) $(RANLIB) $(LIBDIR)/$(LIBNAME) -mpfobjs: +mpfobjs: psi_gthsct_mod.o (make $(MPFOBJS) F90="$(MPF90)" FC="$(MPF90)" FCOPT="$(F90COPT)") (make $(FOBJS2) F90="$(MPF77)" FC="$(MPF77)" FCOPT="$(FCOPT)") clean: diff --git a/base/internals/psi_compute_size.f90 b/base/internals/psi_compute_size.f90 index f9c6dbb6..2fefc9ae 100644 --- a/base/internals/psi_compute_size.f90 +++ b/base/internals/psi_compute_size.f90 @@ -28,6 +28,9 @@ !!$ POSSIBILITY OF SUCH DAMAGE. !!$ !!$ +! +! Compute maximum data exchange size; small utility for assembly of descriptors. +! subroutine psi_compute_size(desc_data, index_in, dl_lda, info) use psi_mod, psb_protect_name => psi_compute_size diff --git a/base/internals/psi_crea_bnd_elem.f90 b/base/internals/psi_crea_bnd_elem.f90 index b7b6e41d..4fdcbfef 100644 --- a/base/internals/psi_crea_bnd_elem.f90 +++ b/base/internals/psi_crea_bnd_elem.f90 @@ -27,7 +27,21 @@ !!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE !!$ POSSIBILITY OF SUCH DAMAGE. !!$ -!!$ +!!$ +! +! File: psi_crea_bnd_elem.f90 +! +! Subroutine: psi_crea_bnd_elem +! Extracts a list of boundary indices. If no boundary is present in +! the distribution the output vector is put in the unallocated state, +! otherwise its size is equal to the number of boundary indices on the +! current (calling) process. +! +! Parameters: +! bndel(:) - integer, allocatable Array containing the output list +! desc_a - type(). The communication descriptor. +! info - integer. return code. +! subroutine psi_crea_bnd_elem(bndel,desc_a,info) use psi_mod, psb_protect_name => psi_crea_bnd_elem use psb_realloc_mod @@ -68,21 +82,7 @@ subroutine psi_crea_bnd_elem(bndel,desc_a,info) j = j + 1 + ns + 1 + nr + 1 enddo - if (i>0) then - call psb_msort(work(1:i)) - j=1 - irv = work(1) - do k=2, i - if (work(k) /= irv) then - irv = work(k) - j = j + 1 - work(j) = work(k) - endif - enddo - else - j = 0 - endif - + call psb_msort_unique(work(1:i),j) if (.true.) then if (j>0) then diff --git a/base/internals/psi_crea_index.f90 b/base/internals/psi_crea_index.f90 index ea88d856..3ea7e8bc 100644 --- a/base/internals/psi_crea_index.f90 +++ b/base/internals/psi_crea_index.f90 @@ -28,6 +28,75 @@ !!$ POSSIBILITY OF SUCH DAMAGE. !!$ !!$ +! +! +! File: psi_crea_index.f90 +! +! Subroutine: psb_crea_index +! Converts a list of data exchanges from build format to assembled format. +! See below for a description of the formats. +! +! Parameters: +! desc_a - type(psb_desc_type) The descriptor; in this context only the index +! mapping parts are used. +! index_in(:) - integer The index list, build format +! index_out(:) - integer, allocatable The index list, assembled format +! glob_idx - logical Whether the input indices are in local or global +! numbering; the global numbering is used when +! converting the overlap exchange lists. +! nxch - integer The number of data exchanges on the calling process +! nsnd - integer Total send buffer size on the calling process +! nrcv - integer Total receive buffer size on the calling process +! +! +! The format of the index lists. Copied from base/modules/psb_desc_type +! +! 7. The data exchange is based on lists of local indices to be exchanged; all the +! lists have the same format, as follows: +! the list is composed of variable dimension blocks, one for each process to +! communicate with; each block contains indices of local elements to be +! exchanged. We do choose the order of communications: do not change +! the sequence of blocks unless you know what you're doing, or you'll +! risk a deadlock. NOTE: This is the format when the state is PSB_ASB_. +! See below for BLD. The end-of-list is marked with a -1. +! +! notation stored in explanation +! --------------- --------------------------- ----------------------------------- +! process_id index_v(p+proc_id_) identifier of process with which +! data is exchanged. +! n_elements_recv index_v(p+n_elem_recv_) number of elements to receive. +! elements_recv index_v(p+elem_recv_+i) indexes of local elements to +! receive. these are stored in the +! array from location p+elem_recv_ to +! location p+elem_recv_+ +! index_v(p+n_elem_recv_)-1. +! n_elements_send index_v(p+n_elem_send_) number of elements to send. +! elements_send index_v(p+elem_send_+i) indexes of local elements to +! send. these are stored in the +! array from location p+elem_send_ to +! location p+elem_send_+ +! index_v(p+n_elem_send_)-1. +! +! This organization is valid for both halo and overlap indices; overlap entries +! need to be updated to ensure that a variable at a given global index +! (assigned to multiple processes) has the same value. The way to resolve the +! issue is to exchange the data and then sum (or average) the values. See +! psb_ovrl subroutine. +! +! 8. When the descriptor is in the BLD state the INDEX vectors contains only +! the indices to be received, organized as a sequence +! of entries of the form (proc,N,(lx1,lx2,...,lxn)) with owning process, +! number of indices (most often N=1), list of local indices. +! This is because we only know the list of halo indices to be received +! as we go about building the sparse matrix pattern, and we want the build +! phase to be loosely synchronized. Thus we record the indices we have to ask +! for, and at the time we call PSB_CDASB we match all the requests to figure +! out who should be sending what to whom. +! However this implies that we know who owns the indices; if we are in the +! LARGE case (as described above) this is actually only true for the OVERLAP list +! that is filled in at CDALL time, and not for the HALO; thus the HALO list +! is rebuilt during the CDASB process (in the psi_ldsc_pre_halo subroutine). +! subroutine psi_crea_index(desc_a,index_in,index_out,glob_idx,nxch,nsnd,nrcv,info) use psb_realloc_mod @@ -76,10 +145,6 @@ subroutine psi_crea_index(desc_a,index_in,index_out,glob_idx,nxch,nsnd,nrcv,info ! ...extract dependence list (ordered list of identifer process ! which every process must communcate with... -!!$ write(0,*) me,name,' Size of desc_in ',size(index_in) -!!$ if (size(index_in)>0) then -!!$ write(0,*) me,name,'first item ',index_in(1) -!!$ end if if (debug) write(*,*) 'crea_halo: calling extract_dep_list' mode = 1 @@ -95,19 +160,18 @@ subroutine psi_crea_index(desc_a,index_in,index_out,glob_idx,nxch,nsnd,nrcv,info ! ...now process root contains dependence list of all processes... if (debug) write(0,*) 'crea_index: root sorting dep list' - ! ....i must order communication in in halo call psi_dl_check(dep_list,max(1,dl_lda),np,length_dl) - ! ....now i can sort dependence list...... + ! ....now i can sort dependency lists. call psi_sort_dl(dep_list,length_dl,np,info) if(info /= 0) then call psb_errpush(4010,name,a_err='psi_sort_dl') goto 9999 end if - ! ...create desc_halo array..... if(debug) write(0,*)'in psi_crea_index calling psi_desc_index',& & size(index_out) + ! Do the actual format conversion. call psi_desc_index(desc_a,index_in,dep_list(1:,me),& & length_dl(me),nsnd,nrcv, index_out,glob_idx,info) if(debug) write(0,*)'out of psi_desc_index',& diff --git a/base/internals/psi_crea_ovr_elem.f90 b/base/internals/psi_crea_ovr_elem.f90 index 17a685d9..24989c0b 100644 --- a/base/internals/psi_crea_ovr_elem.f90 +++ b/base/internals/psi_crea_ovr_elem.f90 @@ -28,6 +28,19 @@ !!$ POSSIBILITY OF SUCH DAMAGE. !!$ !!$ +! +! File: psi_crea_ovr_elem.f90 +! +! Subroutine: psi_crea_ovr_elem +! Creates the overlap_elem list: for each overlap index, store the index and +! the number of processes sharing it (minimum: 2). List is ended by -1. +! See also description in base/modules/psb_desc_type.f90 +! +! Parameters: +! ovr_elem(:) - integer, allocatable Array containing the output list +! desc_a - type(). The communication descriptor. +! info - integer. return code. +! subroutine psi_crea_ovr_elem(desc_overlap,ovr_elem,info) use psi_mod, psb_protect_name => psi_crea_ovr_elem @@ -133,7 +146,8 @@ subroutine psi_crea_ovr_elem(desc_overlap,ovr_elem,info) ovr_elem(pnt_new_elem)=-1 call freepairsearchtree(pairtree) - else + else if (.not.usetree) then + insize = size(desc_overlap) insize = max(1,(insize+1)/2) diff --git a/base/internals/psi_desc_index.F90 b/base/internals/psi_desc_index.F90 index 25442aeb..e9fc57c4 100644 --- a/base/internals/psi_desc_index.F90 +++ b/base/internals/psi_desc_index.F90 @@ -230,17 +230,11 @@ subroutine psi_desc_index(desc,index_in,dep_list,& desc_index(i) = nerv call psi_idx_cnv(nerv,sndbuf(bsdindx(proc+1)+1:bsdindx(proc+1)+nerv),& & desc_index(i+1:i+nerv),desc,info) -!!$ do j=1, nerv -!!$ desc_index(i+j) = glob_to_loc(sndbuf(bsdindx(proc+1)+j)) -!!$ end do i = i + nerv + 1 nesd = rvsz(proc+1) desc_index(i) = nesd call psi_idx_cnv(nesd,rcvbuf(brvindx(proc+1)+1:brvindx(proc+1)+nesd),& & desc_index(i+1:i+nesd),desc,info) -!!$ do j=1, nesd -!!$ desc_index(i+j) = glob_to_loc(rcvbuf(brvindx(proc+1)+j)) -!!$ end do i = i + nesd + 1 end do desc_index(i) = - 1 diff --git a/base/internals/psi_dl_check.f90 b/base/internals/psi_dl_check.f90 index 95c6ac87..b336b05d 100644 --- a/base/internals/psi_dl_check.f90 +++ b/base/internals/psi_dl_check.f90 @@ -28,6 +28,21 @@ !!$ POSSIBILITY OF SUCH DAMAGE. !!$ !!$ +! +! File: psi_dl_check.f90 +! +! Subroutine: psi_dl_check +! Make sure a dependency list is symmetric, i.e. if process i depends on j +! then process j should depend on i (even if the data to be sent in one of the +! directions happens to be empty) +! +! Parameters: +! dep_list(:,:) - integer Initial dependency lists +! dl_lda - integer Allocated size of dep_list +! np - integer Total number of processes. +! length_dl(:) - integer Items in dependency lists; updated on +! exit +! subroutine psi_dl_check(dep_list,dl_lda,np,length_dl) use psi_mod, psb_protect_name => psi_dl_check @@ -40,11 +55,10 @@ subroutine psi_dl_check(dep_list,dl_lda,np,length_dl) ! locals integer :: proc, proc2, i, j - ! ...i must order communication in in halo - ! ...if in dep_list of process i there is j - ! and in dep_list of process j there isn't i, - ! add to it process i... + ! ...if j is in dep_list of process i + ! and i is not in dep_list of process j + ! fix it. do proc=0,np-1 i=1 diff --git a/base/modules/psi_gthsct_mod.f90 b/base/internals/psi_gthsct_mod.f90 similarity index 88% rename from base/modules/psi_gthsct_mod.f90 rename to base/internals/psi_gthsct_mod.f90 index d52cce9a..24a4f20f 100644 --- a/base/modules/psi_gthsct_mod.f90 +++ b/base/internals/psi_gthsct_mod.f90 @@ -28,6 +28,14 @@ !!$ POSSIBILITY OF SUCH DAMAGE. !!$ !!$ +! File: psi_gthsct_mod.f90 +! +! Module: psi_gth_scr_mod +! Provides pack/unpack routines for usage in the data exchange. +! The unpack routines take a BETA argument to have a unified treatment of +! simple receives with overwriting, and receives with sum (for overlap) +! +! module psi_gthsct_mod interface psi_gth @@ -46,6 +54,7 @@ contains subroutine psi_dgthm(n,k,idx,x,y) + use psb_const_mod implicit none integer :: n, k, idx(:) @@ -66,6 +75,7 @@ contains subroutine psi_dgthv(n,idx,x,y) + use psb_const_mod implicit none integer :: n, idx(:) @@ -83,6 +93,7 @@ contains subroutine psi_dsctm(n,k,idx,x,beta,y) + use psb_const_mod implicit none integer :: n, k, idx(:) @@ -91,7 +102,7 @@ contains ! Locals integer :: i, j, pt - if (beta.eq.0.d0) then + if (beta == dzero) then pt=0 do j=1,k do i=1,n @@ -99,7 +110,7 @@ contains y(idx(i),j) = x(pt) end do end do - else if (beta.eq.1.d0) then + else if (beta == done) then pt=0 do j=1,k do i=1,n @@ -120,6 +131,7 @@ contains subroutine psi_dsctv(n,idx,x,beta,y) + use psb_const_mod implicit none integer :: n, k, idx(:) @@ -128,11 +140,11 @@ contains ! Locals integer :: i, j, pt - if (beta.eq.0.d0) then + if (beta == dzero) then do i=1,n y(idx(i)) = x(i) end do - else if (beta.eq.1.d0) then + else if (beta == done) then do i=1,n y(idx(i)) = y(idx(i))+x(i) end do @@ -146,6 +158,7 @@ contains subroutine psi_igthm(n,k,idx,x,y) + use psb_const_mod implicit none integer :: n, k, idx(:) @@ -167,6 +180,7 @@ contains subroutine psi_igthv(n,idx,x,y) + use psb_const_mod implicit none integer :: n, idx(:) @@ -185,6 +199,7 @@ contains subroutine psi_isctm(n,k,idx,x,beta,y) + use psb_const_mod implicit none integer :: n, k, idx(:) @@ -193,7 +208,7 @@ contains ! Locals integer :: i, j, pt - if (beta.eq.0.d0) then + if (beta == izero) then pt=0 do j=1,k do i=1,n @@ -201,7 +216,7 @@ contains y(idx(i),j) = x(pt) end do end do - else if (beta.eq.1.d0) then + else if (beta == ione) then pt=0 do j=1,k do i=1,n @@ -222,6 +237,7 @@ contains subroutine psi_isctv(n,idx,x,beta,y) + use psb_const_mod implicit none integer :: n, k, idx(:) @@ -230,11 +246,11 @@ contains ! Locals integer :: i, j, pt - if (beta.eq.0.d0) then + if (beta == izero) then do i=1,n y(idx(i)) = x(i) end do - else if (beta.eq.1.d0) then + else if (beta == ione) then do i=1,n y(idx(i)) = y(idx(i))+x(i) end do @@ -248,6 +264,7 @@ contains subroutine psi_zgthm(n,k,idx,x,y) + use psb_const_mod implicit none integer :: n, k, idx(:) @@ -269,6 +286,7 @@ contains subroutine psi_zgthv(n,idx,x,y) + use psb_const_mod implicit none integer :: n, idx(:) @@ -285,6 +303,7 @@ contains subroutine psi_zsctm(n,k,idx,x,beta,y) + use psb_const_mod implicit none integer :: n, k, idx(:) @@ -293,7 +312,7 @@ contains ! Locals integer :: i, j, pt - if (beta.eq.0.d0) then + if (beta == zzero) then pt=0 do j=1,k do i=1,n @@ -301,7 +320,7 @@ contains y(idx(i),j) = x(pt) end do end do - else if (beta.eq.1.d0) then + else if (beta == zone) then pt=0 do j=1,k do i=1,n @@ -323,6 +342,7 @@ contains subroutine psi_zsctv(n,idx,x,beta,y) + use psb_const_mod implicit none integer :: n, k, idx(:) @@ -331,11 +351,11 @@ contains ! Locals integer :: i, j, pt - if (beta.eq.0.d0) then + if (beta == zzero) then do i=1,n y(idx(i)) = x(i) end do - else if (beta.eq.1.d0) then + else if (beta == zone) then do i=1,n y(idx(i)) = y(idx(i))+x(i) end do diff --git a/base/modules/Makefile b/base/modules/Makefile index 53f89ae3..1120c526 100644 --- a/base/modules/Makefile +++ b/base/modules/Makefile @@ -3,7 +3,6 @@ include ../../Make.inc MODULES = psb_realloc_mod.o psb_string_mod.o psb_spmat_type.o \ psb_desc_type.o psb_spsb_mod.o \ psb_serial_mod.o psb_tools_mod.o \ - psi_gthsct_mod.o \ psb_error_mod.o \ psb_const_mod.o \ psb_comm_mod.o psb_psblas_mod.o psi_mod.o \ @@ -23,7 +22,7 @@ psb_realloc_mod.o : psb_error_mod.o psb_spmat_type.o : psb_realloc_mod.o psb_error_mod.o psb_const_mod.o psb_string_mod.o psb_error_mod.o: psb_const_mod.o psb_penv_mod.o: psb_const_mod.o psb_error_mod.o psb_realloc_mod.o -psi_mod.o: psb_penv_mod.o psb_error_mod.o psb_desc_type.o psi_gthsct_mod.o +psi_mod.o: psb_penv_mod.o psb_error_mod.o psb_desc_type.o psb_desc_type.o: psb_const_mod.o psb_error_mod.o psb_penv_mod.o psb_check_mod.o: psb_desc_type.o psb_methd_mod.o: psb_serial_mod.o psb_desc_type.o psb_prec_type.o diff --git a/base/modules/psb_const_mod.f90 b/base/modules/psb_const_mod.f90 index de584e7e..066854ad 100644 --- a/base/modules/psb_const_mod.f90 +++ b/base/modules/psb_const_mod.f90 @@ -33,13 +33,13 @@ module psb_const_mod ! ! Handy & miscellaneous constants ! - integer, parameter :: ione=1, izero=0 - integer, parameter :: itwo=2, ithree=3,mone=-1, psb_root_=0 - real(kind(1.d0)), parameter :: psb_colrow_=0.33, psb_percent_=0.7 - real(kind(1.d0)), parameter :: dzero=0.d0, done=1.d0 + integer, parameter :: izero=0, ione=1 + integer, parameter :: itwo=2, ithree=3,mone=-1, psb_root_=0 + real(kind(1.d0)), parameter :: psb_colrow_=0.33, psb_percent_=0.7 + real(kind(1.d0)), parameter :: dzero=0.d0, done=1.d0 complex(kind(1.d0)), parameter :: zzero=(0.d0,0.0d0) complex(kind(1.d0)), parameter :: zone=(1.d0,0.0d0) - real(kind(1.d0)), parameter :: epstol=1.d-32 + real(kind(1.d0)), parameter :: epstol=1.d-32 character, parameter :: psb_all_='A', psb_topdef_=' ' diff --git a/base/serial/aux/dasr.f90 b/base/serial/aux/dasr.f90 index f0581efd..36ff9081 100644 --- a/base/serial/aux/dasr.f90 +++ b/base/serial/aux/dasr.f90 @@ -32,7 +32,7 @@ subroutine dasr(n,x,dir) use psb_serial_mod implicit none - ! + ! ! Quicksort on absolute value. ! Adapted from a number of sources, including Don Knuth's TAOCP. ! diff --git a/test/fileread/runs/dfs.inp b/test/fileread/runs/dfs.inp index 95ddb635..7b6cea3b 100644 --- a/test/fileread/runs/dfs.inp +++ b/test/fileread/runs/dfs.inp @@ -1,6 +1,6 @@ 11 Number of inputs -young1r.mtx This (and others) from: http://math.nist.gov/MatrixMarket/ or -NONE http://www.cise.ufl.edu/research/sparse/matrices/index.html +a.mtx thm1000x600.mtx young1r.mtx This (and others) from: http://math.nist.gov/MatrixMarket/ or +rhs.mtx NONE http://www.cise.ufl.edu/research/sparse/matrices/index.html BICGSTAB Iterative method: BiCGSTAB CGS RGMRES BiCGSTABL BJAC Preconditioner NONE DIAG BJAC CSR Storage format CSR COO JAD diff --git a/test/pargen/runs/ppde.inp b/test/pargen/runs/ppde.inp index 9b87ac11..770d7c59 100644 --- a/test/pargen/runs/ppde.inp +++ b/test/pargen/runs/ppde.inp @@ -1,11 +1,11 @@ 7 Number of entries below this -BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES +BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES BJAC Preconditioner NONE DIAG BJAC -CSR A Storage format CSR COO JAD -20 Domain size (acutal system is this**3) +JAD CSR A Storage format CSR COO JAD +60 Domain size (acutal system is this**3) 1 Stopping criterion 80 MAXIT -00 ITRACE -02 IRST restart for RGMRES and BiCGSTABL +01 ITRACE +20 IRST restart for RGMRES and BiCGSTABL