Branch to test split diag/nondiag matrix.

psblas-caf-csr
Salvatore Filippone 8 years ago
parent e3085a5f79
commit 5295c552ff

@ -101,6 +101,15 @@ module psb_d_csr_mat_mod
end type psb_d_csr_sparse_mat
type, extends(psb_d_csr_sparse_mat) :: psb_d_csre_sparse_mat
contains
procedure, pass(a) :: csmv => psb_d_csre_csmv
procedure, nopass :: get_fmt => d_csre_get_fmt
end type psb_d_csre_sparse_mat
private :: d_csr_get_nzeros, d_csr_free, d_csr_get_fmt, &
& d_csr_get_size, d_csr_sizeof, d_csr_get_nz_row, &
& d_csr_is_by_rows
@ -494,6 +503,19 @@ module psb_d_csr_mat_mod
end subroutine psb_d_csr_scals
end interface
!> \memberof psb_d_csre_sparse_mat
!! \see psb_d_base_mat_mod::psb_d_base_csmv
interface
subroutine psb_d_csre_csmv(alpha,a,x,beta,y,info,trans)
import :: psb_ipk_, psb_d_csre_sparse_mat, psb_dpk_
class(psb_d_csre_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta, x(:)
real(psb_dpk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_d_csre_csmv
end interface
contains
@ -511,16 +533,16 @@ contains
! == ===================================
function d_csr_is_by_rows(a) result(res)
implicit none
class(psb_d_csr_sparse_mat), intent(in) :: a
logical :: res
res = .true.
end function d_csr_is_by_rows
function d_csr_sizeof(a) result(res)
implicit none
class(psb_d_csr_sparse_mat), intent(in) :: a
@ -529,7 +551,7 @@ contains
res = res + psb_sizeof_dp * psb_size(a%val)
res = res + psb_sizeof_int * psb_size(a%irp)
res = res + psb_sizeof_int * psb_size(a%ja)
end function d_csr_sizeof
function d_csr_get_fmt() result(res)
@ -537,7 +559,13 @@ contains
character(len=5) :: res
res = 'CSR'
end function d_csr_get_fmt
function d_csre_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'CSRe'
end function d_csre_get_fmt
function d_csr_get_nzeros(a) result(res)
implicit none
class(psb_d_csr_sparse_mat), intent(in) :: a
@ -551,7 +579,7 @@ contains
integer(psb_ipk_) :: res
res = -1
if (allocated(a%ja)) then
res = size(a%ja)
end if
@ -570,17 +598,17 @@ contains
function d_csr_get_nz_row(idx,a) result(res)
implicit none
class(psb_d_csr_sparse_mat), intent(in) :: a
integer(psb_ipk_), intent(in) :: idx
integer(psb_ipk_) :: res
res = 0
if ((1<=idx).and.(idx<=a%get_nrows())) then
res = a%irp(idx+1)-a%irp(idx)
end if
end function d_csr_get_nz_row
@ -608,7 +636,7 @@ contains
call a%set_null()
call a%set_nrows(izero)
call a%set_ncols(izero)
return
end subroutine d_csr_free

@ -72,7 +72,7 @@
module psb_d_mat_mod
use psb_d_base_mat_mod
use psb_d_csr_mat_mod, only : psb_d_csr_sparse_mat
use psb_d_csr_mat_mod, only : psb_d_csr_sparse_mat, psb_d_csre_sparse_mat
use psb_d_csc_mat_mod, only : psb_d_csc_sparse_mat
type :: psb_dspmat_type

@ -3180,3 +3180,365 @@ contains
end subroutine csr_spspmm
end subroutine psb_dcsrspspmm
! == ===================================
!
!
!
! Computational routines
!
!
!
!
!
!
! == ===================================
subroutine psb_d_csre_csmv(alpha,a,x,beta,y,info,trans)
use psb_error_mod
use psb_string_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_csre_csmv
implicit none
class(psb_d_csre_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta, x(:)
real(psb_dpk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc
real(psb_dpk_) :: acc
logical :: tra, ctra
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name='d_csr_csmv'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (a%is_dev()) call a%sync()
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T')
ctra = (psb_toupper(trans_) == 'C')
if (tra.or.ctra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = psb_err_input_asize_small_i_
ierr(1) = 3; ierr(2) = n;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if (size(y,1)<m) then
info = psb_err_input_asize_small_i_
ierr(1) = 5; ierr(2) = m;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
call psb_d_csre_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,&
& a%is_triangle(),a%is_unit(),&
& x,beta,y,tra,ctra)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
contains
subroutine psb_d_csre_csmv_inner(m,n,alpha,irp,ja,val,is_triangle,is_unit,&
& x,beta,y,tra,ctra)
integer(psb_ipk_), intent(in) :: m,n,irp(*),ja(*)
real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(*)
real(psb_dpk_), intent(inout) :: y(*)
logical, intent(in) :: is_triangle,is_unit,tra, ctra
integer(psb_ipk_) :: i,j,k, ir, jc
real(psb_dpk_) :: acc
if (alpha == dzero) then
if (beta == dzero) then
do i = 1, m
y(i) = dzero
enddo
else
do i = 1, m
y(i) = beta*y(i)
end do
endif
return
end if
if ((.not.tra).and.(.not.ctra)) then
if (beta == dzero) then
if (alpha == done) then
do i=1,m
acc = dzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = acc
end do
else if (alpha == -done) then
do i=1,m
acc = dzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = -acc
end do
else
do i=1,m
acc = dzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = alpha*acc
end do
end if
else if (beta == done) then
if (alpha == done) then
do i=1,m
acc = dzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
if (acc /= dzero) y(i) = y(i) + acc
end do
else if (alpha == -done) then
do i=1,m
acc = dzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
if (acc /= dzero) y(i) = y(i) -acc
end do
else
do i=1,m
acc = dzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
if (acc /= dzero) y(i) = y(i) + alpha*acc
end do
end if
else if (beta == -done) then
if (alpha == done) then
do i=1,m
acc = dzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = -y(i) + acc
end do
else if (alpha == -done) then
do i=1,m
acc = dzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = -y(i) -acc
end do
else
do i=1,m
acc = dzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = -y(i) + alpha*acc
end do
end if
else
if (alpha == done) then
do i=1,m
acc = dzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = beta*y(i) + acc
end do
else if (alpha == -done) then
do i=1,m
acc = dzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = beta*y(i) - acc
end do
else
do i=1,m
acc = dzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = beta*y(i) + alpha*acc
end do
end if
end if
else if (tra) then
if (beta == dzero) then
do i=1, m
y(i) = dzero
end do
else if (beta == done) then
! Do nothing
else if (beta == -done) then
do i=1, m
y(i) = -y(i)
end do
else
do i=1, m
y(i) = beta*y(i)
end do
end if
if (alpha == done) then
do i=1,n
do j=irp(i), irp(i+1)-1
ir = ja(j)
y(ir) = y(ir) + val(j)*x(i)
end do
enddo
else if (alpha == -done) then
do i=1,n
do j=irp(i), irp(i+1)-1
ir = ja(j)
y(ir) = y(ir) - val(j)*x(i)
end do
enddo
else
do i=1,n
do j=irp(i), irp(i+1)-1
ir = ja(j)
y(ir) = y(ir) + alpha*val(j)*x(i)
end do
enddo
end if
else if (ctra) then
if (beta == dzero) then
do i=1, m
y(i) = dzero
end do
else if (beta == done) then
! Do nothing
else if (beta == -done) then
do i=1, m
y(i) = -y(i)
end do
else
do i=1, m
y(i) = beta*y(i)
end do
end if
if (alpha == done) then
do i=1,n
do j=irp(i), irp(i+1)-1
ir = ja(j)
y(ir) = y(ir) + (val(j))*x(i)
end do
enddo
else if (alpha == -done) then
do i=1,n
do j=irp(i), irp(i+1)-1
ir = ja(j)
y(ir) = y(ir) - (val(j))*x(i)
end do
enddo
else
do i=1,n
do j=irp(i), irp(i+1)-1
ir = ja(j)
y(ir) = y(ir) + alpha*(val(j))*x(i)
end do
enddo
end if
endif
if (is_unit) then
do i=1, min(m,n)
y(i) = y(i) + alpha*x(i)
end do
end if
end subroutine psb_d_csre_csmv_inner
end subroutine psb_d_csre_csmv

@ -11,45 +11,41 @@
# The following ones are the variables used by the PSBLAS make scripts.
F90=caf
FC= caf
CC=mpicc
F90COPT=-O3
FCOPT=-O3
CCOPT=-O3
FC=caf
CC=mpicc
FCOPT=-g -O3
CCOPT=-g -O3
FMFLAG=-I
FIFLAG=-I
EXTRA_OPT=
# These three should be always set!
MPF90=caf
MPF77=caf
MPCC=mpicc
MPFC=caf
MPCC=mpicc
F90LINK=$(MPF90)
FLINK=$(MPF77)
FLINK=$(MPFC)
LIBS= -L/home/users/pasqua/NUMERICAL/LIB/atlas/gnu491
LIBS=
# BLAS, BLACS and METIS libraries.
BLAS=-lcblas -lf77blas -latlas
METIS_LIB=
BLAS=-lcblas -lf77blas -latlas -L/opt/atlas/3.8.4/gnu/7.1.0/lib
METIS_LIB=-lmetis -L/opt/parmetis/4.0.3/mpich/3.2.0/gnu/7.1.0/Lib -L/opt/parmetis/4.0.3/mpich/3.2.0/gnu/7.1.0/lib
AMD_LIB=
LAPACK=-llapack
EXTRA_COBJS=
PSBFDEFINES= -DHAVE_MOLD -DHAVE_EXTENDS_TYPE_OF -DHAVE_SAME_TYPE_AS -DHAVE_FINAL -DHAVE_ISO_FORTRAN_ENV -DHAVE_FLUSH_STMT -DHAVE_VOLATILE -DHAVE_ISO_C_BINDING -DHAVE_MOVE_ALLOC -DMPI_MOD
PSBCDEFINES=-DLowerUnderscore -DPtr64Bits
PSBFDEFINES=-DHAVE_METIS -DHAVE_LAPACK -DHAVE_MOLD -DHAVE_EXTENDS_TYPE_OF -DHAVE_SAME_TYPE_AS -DHAVE_FINAL -DHAVE_ISO_FORTRAN_ENV -DHAVE_FLUSH_STMT -DHAVE_VOLATILE -DMPI_MOD
PSBCDEFINES=-DHAVE_METIS_ -I/opt/parmetis/4.0.3/mpich/3.2.0/gnu/7.1.0/include -I/opt/parmetis/4.0.3/mpich/3.2.0/gnu/7.1.0/Include -DLowerUnderscore -DPtr64Bits
AR=ar -cur
RANLIB=ranlib
INSTALL=/usr/bin/install -c
INSTALL_DATA=${INSTALL} -m 644
INSTALL_DIR=/home/users/pasqua/Ambra/LIB/PSBLAS
INSTALL_LIBDIR=/home/users/pasqua/Ambra/LIB/PSBLAS/lib
INSTALL_INCLUDEDIR=/home/users/pasqua/Ambra/LIB/PSBLAS/include
INSTALL_DOCSDIR=/home/users/pasqua/Ambra/LIB/PSBLAS/docs
INSTALL_SAMPLESDIR=/home/users/pasqua/Ambra/LIB/PSBLAS/samples
INSTALL_DIR=/opt/psblas/CAF/7.1.0-csr
INSTALL_LIBDIR=/opt/psblas/CAF/7.1.0-csr/lib
INSTALL_INCLUDEDIR=/opt/psblas/CAF/7.1.0-csr/include
INSTALL_DOCSDIR=/opt/psblas/CAF/7.1.0-csr/docs
INSTALL_SAMPLESDIR=/opt/psblas/CAF/7.1.0-csr/samples
# the following is the flag for /bin/cp which shall copy the file only for updating (timestamp based)--on GNU Linux, '-u'
CPUPDFLAG=
@ -75,12 +71,8 @@ FDEFINES=$(PSBFDEFINES)
# These should be portable rules, arent they?
.c.o:
$(CC) $(CCOPT) $(CINCLUDES) $(CDEFINES) -c $< -o $@
.f.o:
$(FC) $(FCOPT) $(FINCLUDES) -c $< -o $@
.f90.o:
$(F90) $(F90COPT) $(FINCLUDES) -c $< -o $@
.F.o:
$(FC) $(FCOPT) $(FINCLUDES) $(FDEFINES) -c $< -o $@
$(FC) $(FCOPT) $(FINCLUDES) -c $< -o $@
.F90.o:
$(F90) $(F90COPT) $(FINCLUDES) $(FDEFINES) -c $< -o $@
$(FC) $(FCOPT) $(FINCLUDES) $(FDEFINES) -c $< -o $@

@ -43,10 +43,10 @@ program pdgenspmv
! miscellaneous
real(psb_dpk_), parameter :: one = 1.d0
real(psb_dpk_) :: t1, t2, tprec, flops, tflops, tt1, tt2, bdwdth
real(psb_dpk_) :: t1, t2, tprec, flops, tflops, tflops1, tt1, tt2, bdwdth, th, th1
! sparse matrix and preconditioner
type(psb_dspmat_type) :: a
type(psb_dspmat_type) :: a, ad, ah, ah1
! descriptor
type(psb_desc_type) :: desc_a
! dense matrices
@ -54,12 +54,13 @@ program pdgenspmv
real(psb_dpk_), allocatable :: tst(:)
! blacs parameters
integer(psb_ipk_) :: ictxt, iam, np
type(psb_d_csre_sparse_mat) :: acsre
! solver parameters
integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, nr
integer(psb_long_int_k_) :: amatsize, precsize, descsize, d2size, annz, nbytes
integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, nr,nrl,ncl
integer(psb_long_int_k_) :: amatsize, precsize, descsize, d2size, annz, nbytes, ahnnz
real(psb_dpk_) :: err, eps
integer(psb_ipk_), parameter :: times=10
integer(psb_ipk_), parameter :: times=50
! other variables
integer(psb_ipk_) :: info, i
@ -109,7 +110,9 @@ program pdgenspmv
end if
if (iam == psb_root_) write(psb_out_unit,'("Overall matrix creation time : ",es12.5)')t2
if (iam == psb_root_) write(psb_out_unit,'(" ")')
!!$ write(fname,'(a,i3.3,a,i3.3,a,i3.3,a)') 'testmat-',idim,'-',np,'-',iam,'.mtx'
!!$ call a%print(fname,head="psb-testing")
call xv%set(done)
call psb_barrier(ictxt)
@ -121,43 +124,67 @@ program pdgenspmv
t2 = psb_wtime() - t1
call psb_amx(ictxt,t2)
nrl = desc_a%get_local_rows()
ncl = desc_a%get_local_cols()
call a%csclip(ad,info,jmax=nrl)
call a%csclip(ah,info,jmin=nrl+1,jmax=ncl,cscale=.true.)
call a%csclip(ah1,info,jmin=nrl+1,jmax=ncl,cscale=.true.)
call ah%cscnv(info,mold=acsre)
call ah1%cscnv(info,mold=acsre)
! FIXME: cache flush needed here
call psb_barrier(ictxt)
tt1 = psb_wtime()
do i=1,times
call psb_spmm(done,a,xv,dzero,bv,desc_a,info,'t')
call psb_csmm(done,ah,xv,done,bv,info)
end do
call psb_barrier(ictxt)
th = psb_wtime() - tt1
call psb_barrier(ictxt)
tt1 = psb_wtime()
do i=1,times
call psb_csmm(done,ah1,xv,done,bv,info)
end do
call psb_barrier(ictxt)
tt2 = psb_wtime() - tt1
call psb_amx(ictxt,tt2)
th1 = psb_wtime() - tt1
call psb_amx(ictxt,th1)
call psb_amx(ictxt,t2)
nr = desc_a%get_global_rows()
annz = a%get_nzeros()
ahnnz = ah%get_nzeros()
amatsize = a%sizeof()
descsize = psb_sizeof(desc_a)
call psb_sum(ictxt,annz)
call psb_sum(ictxt,ahnnz)
call psb_sum(ictxt,amatsize)
call psb_sum(ictxt,descsize)
if (iam == psb_root_) then
flops = 2.d0*times*annz
tflops=flops
flops = 2.d0*times*annz
tflops = 2.d0*times*ahnnz
tflops1 = 2.d0*times*ahnnz
write(psb_out_unit,'("Matrix: ell1 ",i0)') idim
write(psb_out_unit,'("Test on : ",i20," processors")') np
write(psb_out_unit,'("Size of matrix : ",i20," ")') nr
write(psb_out_unit,'("Number of nonzeros : ",i20," ")') annz
write(psb_out_unit,'("Number of nonzeros : ",i20," ")') ahnnz
write(psb_out_unit,'("Memory occupation : ",i20," ")') amatsize
write(psb_out_unit,'("Number of flops (",i0," prod) : ",F20.0," ")') times,flops
flops = flops / (t2)
tflops = tflops / (tt2)
flops = flops / (t2)
tflops = tflops / (th)
tflops1 = tflops1 / (th1)
write(psb_out_unit,'("Time for ",i0," products (s) : ",F20.3)')times, t2
write(psb_out_unit,'("Time per product (ms) : ",F20.3)') t2*1.d3/(1.d0*times)
write(psb_out_unit,'("MFLOPS : ",F20.3)') flops/1.d6
write(psb_out_unit,'("Time for ",i0," products (s) (trans.): ",F20.3)') times,tt2
write(psb_out_unit,'("Time per product (ms) (trans.): ",F20.3)') tt2*1.d3/(1.d0*times)
write(psb_out_unit,'("Time for ",i0," products (s) (trans.): ",F20.3)') times,th
write(psb_out_unit,'("Time per product (ms) (trans.): ",F20.3)') th*1.d3/(1.d0*times)
write(psb_out_unit,'("MFLOPS (trans.): ",F20.3)') tflops/1.d6
write(psb_out_unit,'("Time for ",i0," products (s) (trans.): ",F20.3)') times,th1
write(psb_out_unit,'("Time per product (ms) (trans.): ",F20.3)') th1*1.d3/(1.d0*times)
write(psb_out_unit,'("MFLOPS (trans.): ",F20.3)') tflops1/1.d6
!
! This computation is valid for CSR

Loading…
Cancel
Save