From 07ab8f0632f87a52677d1dfefd9fcb7ad20ef616 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 11 Jan 2011 17:27:07 +0000 Subject: [PATCH] psblas3-dev: opt/Makefile opt/elldev.c opt/elldev.h opt/elldev_mod.F90 opt/psb_d_elg_impl.F90 opt/psb_d_elg_mat_mod.F90 Started work on GPU interfacing. --- opt/Makefile | 12 +- opt/elldev.c | 114 ++ opt/elldev.h | 88 ++ opt/elldev_mod.F90 | 156 +++ opt/psb_d_elg_impl.F90 | 2789 +++++++++++++++++++++++++++++++++++++ opt/psb_d_elg_mat_mod.F90 | 289 ++++ 6 files changed, 3444 insertions(+), 4 deletions(-) create mode 100644 opt/elldev.c create mode 100644 opt/elldev.h create mode 100644 opt/elldev_mod.F90 create mode 100644 opt/psb_d_elg_impl.F90 create mode 100644 opt/psb_d_elg_mat_mod.F90 diff --git a/opt/Makefile b/opt/Makefile index 9b3b6218..fe56f3c9 100644 --- a/opt/Makefile +++ b/opt/Makefile @@ -15,20 +15,24 @@ FINCLUDES=$(FMFLAG)$(LIBDIR) $(FMFLAG). EXEDIR=./runs -OBJS=psb_d_ell_impl.o psb_d_ell_mat_mod.o rsb_mod.o psb_d_rsb_mat_mod.o -all: libopt.a +OBJS=psb_d_ell_impl.o psb_d_ell_mat_mod.o rsb_mod.o psb_d_rsb_mat_mod.o \ + psb_d_elg_mat_mod.o psb_d_elg_impl.o elldev.o elldev_mod.o +lib: libopt.a libopt.a: $(OBJS) ar cur libopt.a $(OBJS) psb_d_ell_impl.o: psb_d_ell_mat_mod.o psb_d_rsb_mat_mod.o: rsb_mod.o +psb_d_elg_impl.o: psb_d_elg_mat_mod.o +psb_d_elg_mat_mod.o: elldev_mod.o +elldev.o: elldev.c +elldev.c: elldev.h + clean: /bin/rm -f $(OBJS) *$(.mod) verycleanlib: (cd ../..; make veryclean) -lib: - (cd ../../; make library) diff --git a/opt/elldev.c b/opt/elldev.c new file mode 100644 index 00000000..3f687f2c --- /dev/null +++ b/opt/elldev.c @@ -0,0 +1,114 @@ +#include "elldev.h" + +// sparse Ell matrix-vector product +int spmvEllDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY); +int spmvEllDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY); + + +int writeVecDeviceFloat(void* deviceVec, float* hostVec); +int writeVecDeviceDouble(void* deviceVec, double* hostVec); + +int readVecDeviceFloat(void* deviceVec, float* hostVec); +int readVecDeviceDouble(void* deviceVec, double* hostVec); + +int dotVecDeviceFloat(float* y_res, void* devVecA, void* devVecB); +int dotVecDeviceDouble(double* y_res, void* devVecA, void* devVecB); + +int axpbyVecDeviceFloat(float* alpha, void* devVecX, float* beta, void* devVecY); +int axpbyVecDeviceDouble(double* alpha, void* devVecX, double* beta, void* devVecY); + + + + +int spmvEllDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY) +{ +#ifdef HAVE_ELL_GPU + return spmvEllDevice(deviceMat, (void *) alpha, deviceX, (void *) beta, deviceY); +#else + return CINTRF_UNSUPPORTED; +#endif +} + +int spmvEllDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY) +{ +#ifdef HAVE_ELL_GPU + return spmvEllDevice(deviceMat, (void *) alpha, deviceX, (void *) beta, deviceY); +#else + return CINTRF_UNSUPPORTED; +#endif +} + + +int writeVecDeviceFloat(void* deviceVec, float* hostVec) +{ +#ifdef HAVE_ELL_GPU + return writeVecDevice(deviceVec, (void *) hostVec); +#else + return CINTRF_UNSUPPORTED; +#endif +} + +int writeVecDeviceDouble(void* deviceVec, double* hostVec) +{ +#ifdef HAVE_ELL_GPU + return writeVecDevice(deviceVec, (void *) hostVec); +#else + return CINTRF_UNSUPPORTED; +#endif +} + +int readVecDeviceFloat(void* deviceVec, float* hostVec) +{ +#ifdef HAVE_ELL_GPU + return readVecDevice(deviceVec, (void *) hostVec); +#else + return CINTRF_UNSUPPORTED; +#endif +} + +int readVecDeviceDouble(void* deviceVec, double* hostVec) +{ +#ifdef HAVE_ELL_GPU + return readVecDevice(deviceVec, (void *) hostVec); +#else + return CINTRF_UNSUPPORTED; +#endif + +} + +int dotVecDeviceFloat(float* y_res, void* devVecA, void* devVecB) +{ +#ifdef HAVE_ELL_GPU + return dotVecDevice((void *) y_res, devVecA, devVecB); +#else + return CINTRF_UNSUPPORTED; +#endif +} + +int dotVecDeviceDouble(double* y_res, void* devVecA, void* devVecB) +{ +#ifdef HAVE_ELL_GPU + return dotVecDevice((void *) y_res, devVecA, devVecB); +#else + return CINTRF_UNSUPPORTED; +#endif +} + +int axpbyVecDeviceFloat(float* alpha, void* devVecX, float* beta, void* devVecY) +{ +#ifdef HAVE_ELL_GPU + return axpbyVecDevice((void *) alpha, devVecX, (void *) beta, devVecY); +#else + return CINTRF_UNSUPPORTED; +#endif +} + +int axpbyVecDeviceDouble(double* alpha, void* devVecX, double* beta, void* devVecY) +{ +#ifdef HAVE_ELL_GPU + return axpbyVecDevice((void *) alpha, devVecX, (void *) beta, devVecY); +#else + return CINTRF_UNSUPPORTED; +#endif +} + diff --git a/opt/elldev.h b/opt/elldev.h new file mode 100644 index 00000000..c008fc47 --- /dev/null +++ b/opt/elldev.h @@ -0,0 +1,88 @@ +#ifndef SPGPU_INTERFACE_H +#define SPGPU_INTERFACE_H + +////////////// +// legenda: +// cM : compressed Matrix +// rP : row pointers +// rS : row size +///////////// + +// element types +#define TYPE_FLOAT 0 +#define TYPE_DOUBLE 1 +// TYPE_COMPLEX? +// TYPE_INT? +// TYPE_BOOLEAN? + +// return codes +#define CINTRF_SUCCESS 0 +#define CINTRF_NOMEMORY -1 +#define CINTRF_UNSUPPORTED -2 + +typedef struct EllDeviceParams +{ + // The resulting allocation for cM and rP will be pitch*maxRowSize*elementSize + unsigned int elementType; + + // Pitch (in number of elements) + unsigned int pitch; + + // Number of rows. + // Used to allocate rS array + unsigned int rows; + + // Largest row size + unsigned int maxRowSize; + + // First index (e.g 0 or 1) + unsigned int firstIndex; +} EllDeviceParams; + +#ifdef HAVE_ELL_GPU +// Generate ELLPACK format matrix parameters +EllDeviceParams getEllDeviceParams(unsigned int rows, unsigned int maxRowSize, unsigned int elementType, unsigned int firstIndex); + +// Allocate/Free matrix on device +// remote pointer returned in *remoteMatrix +// (device struct type is hidden, use device pointer as id) + +// can return CINTRF_SUCCESS or CINTRF_NOMEMORY or CINTRF_UNSUPPORTED +// return the matrix pointer (use the pointer just as an id) in *deviceMat +int allocEllDevice(void** deviceMat, EllDeviceParams* params); +void freeEllDevice(void* deviceMat); + +// Update device copy with host copy +int writeEllDevice(void *deviceMat, void* cM, int* rP, int* rS); + +// Update host copy with device copy +int readEllDevice(void *deviceMat, void* cM, int* rP, int* rS); + +// sparse Ell matrix-vector product +int spmvEllDevice(void *deviceMat, void* alpha, void* deviceX, void* beta, void* deviceY); + +//// Vector management +// can return CINTRF_SUCCESS or CINTRF_NOMEMORY or CINTRF_UNSUPPORTED +// return the vector pointer (use the pointer just as an id) in *deviceVec +int allocVecDevice(void** deviceVec, int size, unsigned int elementType); +void freeVecDevice(void* deviceVec); + +// Host Vector -> Device Vector +// if deviceVec is a float device vector, hostVec will be a float array +// if deviceVec is a double device vector, hostVec will be a double array +int writeVecDevice(void* deviceVec, void* hostVec); +// Device Vector -> Host Vector +int readVecDevice(void* deviceVec, void* hostVec); + +// dot product (y_res = a * b) +// y_res: pointer to result (e.g. float*/double*) +// devVecA, devVecB: device vectors +// if devVecA and devVecB are float prec. device vectors, y_res should be a pointer to a float value +// if devVecA and devVecB are double prec. device vectors, y_res should be a pointer to a double value +int dotVecDevice(void* y_res, void* devVecA, void* devVecB); + +// if devVecX and devVecY are float prec. device vectors, alpha and beta should be pointers to a float value +// if devVecX and devVecY are double prec. device vectors, alpha and beta should be pointers to a double value +int axpbyVecDevice(void* alpha, void* devVecX, void* beta, void* devVecY); +#endif +#endif diff --git a/opt/elldev_mod.F90 b/opt/elldev_mod.F90 new file mode 100644 index 00000000..d998f775 --- /dev/null +++ b/opt/elldev_mod.F90 @@ -0,0 +1,156 @@ +module elldev_mod + use iso_c_binding + + integer(c_int), parameter :: elldev_float = 0 + integer(c_int), parameter :: elldev_double = 1 + integer(c_int), parameter :: elldev_success = 0 + integer(c_int), parameter :: elldev_nomem = -1 + integer(c_int), parameter :: elldev_unsupported = -2 + + type, bind(c) :: elldev_parms + integer(c_int) :: element_type + integer(c_int) :: pitch + integer(c_int) :: rows + integer(c_int) :: maxRowSize + integer(c_int) :: firstIndex + end type elldev_parms + +#ifdef HAVE_ELL_GPU + interface + function getEllDeviceParams(rows,maxRowSize,elementType,firstIndex) & + & result(val) bind(c,name='getEllDeviceParams') + use iso_c_binding + import :: elldev_parms + type(elldev_parms) :: val + integer(c_int), value :: rows,maxRowSize,elementType,firstIndex + end function getEllDeviceParams + end interface + + interface + function allocEllDevice(deviceMat,parms) & + & result(val) bind(c,name='allocEllDevice') + use iso_c_binding + import :: elldev_parms + integer(c_int) :: val + type(elldev_parms), value :: parms + type(c_ptr) :: deviceMat + end function allocEllDevice + end interface + + interface + subroutine freeEllDevice(deviceMat) & + & bind(c,name='freeEllDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine freeEllDevice + end interface + + interface + function allocVecDevice(deviceVec, size, datatype) & + & result(val) bind(c,name='allocVecDevice') + use iso_c_binding + import :: elldev_parms + integer(c_int) :: val + integer(c_int), value :: size, datatype + type(c_ptr) :: deviceVec + end function allocVecDevice + end interface + + interface + subroutine freeVecDevice(deviceVec) & + & bind(c,name='freeVecDevice') + use iso_c_binding + type(c_ptr), value :: deviceVec + end subroutine freeVecDevice + end interface + + interface writeVecDevice + function writeVecDeviceFloat(deviceVec,hostVec) & + & result(val) bind(c,name='writeVecDeviceFloat') + use iso_c_binding + integer(c_int) :: val + type(c_ptr), value :: deviceVec + real(c_float) :: hostVec(*) + end function writeVecDeviceFloat + function writeVecDeviceDouble(deviceVec,hostVec) & + & result(val) bind(c,name='writeVecDeviceDouble') + use iso_c_binding + integer(c_int) :: val + type(c_ptr), value :: deviceVec + real(c_double) :: hostVec(*) + end function writeVecDeviceDouble + end interface writeVecDevice + + interface readVecDevice + function readVecDeviceFloat(deviceVec,hostVec) & + & result(val) bind(c,name='readVecDeviceFloat') + use iso_c_binding + integer(c_int) :: val + type(c_ptr), value :: deviceVec + real(c_float) :: hostVec(*) + end function readVecDeviceFloat + function readVecDeviceDouble(deviceVec,hostVec) & + & result(val) bind(c,name='readVecDeviceDouble') + use iso_c_binding + integer(c_int) :: val + type(c_ptr), value :: deviceVec + real(c_double) :: hostVec(*) + end function readVecDeviceDouble + end interface readVecDevice + + interface spmvEllDevice + function spmvEllDeviceFloat(deviceMat,alpha,x,beta,y) & + & result(val) bind(c,name='spmvEllDeviceFloat') + use iso_c_binding + integer(c_int) :: val + type(c_ptr), value :: deviceMat, x, y + real(c_float) :: alpha, beta + end function spmvEllDeviceFloat + function spmvEllDeviceDouble(deviceMat,alpha,x,beta,y) & + & result(val) bind(c,name='spmvEllDeviceDouble') + use iso_c_binding + integer(c_int) :: val + type(c_ptr), value :: deviceMat, x, y + real(c_double) :: alpha, beta + end function spmvEllDeviceDouble + end interface spmvEllDevice + + interface dotVecDevice + function dotVecDeviceFloat(res,deviceVecA,deviceVecB) & + & result(val) bind(c,name='dotVecDeviceFloat') + use iso_c_binding + integer(c_int) :: val + real(c_float) :: res + type(c_ptr), value :: deviceVecA, deviceVecB + end function dotVecDeviceFloat + function dotVecDeviceDouble(res,deviceVecA,deviceVecB) & + & result(val) bind(c,name='dotVecDeviceDouble') + use iso_c_binding + integer(c_int) :: val + real(c_double) :: res + type(c_ptr), value :: deviceVecA, deviceVecB + end function dotVecDeviceDouble + end interface dotVecDevice + + interface axpbyVecDevice + function axpbyVecDeviceFloat(alpha,deviceVecA,beta,deviceVecB) & + & result(val) bind(c,name='axpbyVecDeviceFloat') + use iso_c_binding + integer(c_int) :: val + real(c_float) :: alpha, beta + type(c_ptr), value :: deviceVecA, deviceVecB + end function axpbyVecDeviceFloat + function axpbyVecDeviceDouble(alpha,deviceVecA,beta,deviceVecB) & + & result(val) bind(c,name='axpbyVecDeviceDouble') + use iso_c_binding + integer(c_int) :: val + real(c_double) :: alpha,beta + type(c_ptr), value :: deviceVecA, deviceVecB + end function axpbyVecDeviceDouble + end interface axpbyVecDevice + + +#endif + + +end module elldev_mod diff --git a/opt/psb_d_elg_impl.F90 b/opt/psb_d_elg_impl.F90 new file mode 100644 index 00000000..e70322b9 --- /dev/null +++ b/opt/psb_d_elg_impl.F90 @@ -0,0 +1,2789 @@ + +! == =================================== +! +! +! +! Computational routines +! +! +! +! +! +! +! == =================================== + +subroutine psb_d_elg_csmv(alpha,a,x,beta,y,info,trans) + use psb_sparse_mod +#ifdef HAVE_ELL_GPU + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_csmv + use elldev_mod +#else + use psb_d_elg_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='d_elg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_elg_csmm + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_dpk_), allocatable :: acc(:) + logical :: tra + Integer :: err_act + character(len=20) :: name='d_elg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_elg_cssv + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + real(psb_dpk_), allocatable :: tmp(:) + logical :: tra + Integer :: err_act + character(len=20) :: name='d_elg_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + m = a%get_nrows() + + if (.not. (a%is_triangle())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x) psb_d_elg_cssm + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_dpk_), allocatable :: tmp(:,:), acc(:) + logical :: tra + Integer :: err_act + character(len=20) :: name='d_elg_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + m = a%get_nrows() + + if (.not. (a%is_triangle())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_d_elg_csnmi + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + logical :: tra + Integer :: err_act + character(len=20) :: name='d_csnmi' + logical, parameter :: debug=.false. + + + res = dzero + + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_d_elg_csnmi + + +function psb_d_elg_csnm1(a) result(res) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_csnm1 + + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer :: i,j,k,m,n, nnz, ir, jc, nc, info + real(psb_dpk_) :: acc + real(psb_dpk_), allocatable :: vt(:) + logical :: tra + Integer :: err_act + character(len=20) :: name='d_elg_csnm1' + logical, parameter :: debug=.false. + + + res = -done + nnz = a%get_nzeros() + m = a%get_nrows() + n = a%get_ncols() + allocate(vt(n),stat=info) + if (info /= 0) return + vt(:) = dzero + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + vt(k) = vt(k) + abs(a%val(i,j)) + end do + end do + res = maxval(vt(1:n)) + deallocate(vt,stat=info) + + return + +end function psb_d_elg_csnm1 + + +subroutine psb_d_elg_rowsum(d,a) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_rowsum + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer :: i,j,k,m,n, nnz, ir, jc, nc + real(psb_dpk_) :: acc + real(psb_dpk_), allocatable :: vt(:) + logical :: tra + Integer :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + do i = 1, a%get_nrows() + d(i) = dzero + do j=1,a%irn(i) + d(i) = d(i) + (a%val(i,j)) + end do + end do + + return + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act == psb_act_abort_) then + call psb_error() + return + end if + return + +end subroutine psb_d_elg_rowsum + +subroutine psb_d_elg_arwsum(d,a) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_arwsum + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer :: i,j,k,m,n, nnz, ir, jc, nc + real(psb_dpk_) :: acc + real(psb_dpk_), allocatable :: vt(:) + logical :: tra + Integer :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + + do i = 1, a%get_nrows() + d(i) = dzero + do j=1,a%irn(i) + d(i) = d(i) + abs(a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act == psb_act_abort_) then + call psb_error() + return + end if + return + +end subroutine psb_d_elg_arwsum + +subroutine psb_d_elg_colsum(d,a) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_colsum + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer :: i,j,k,m,n, nnz, ir, jc, nc + real(psb_dpk_) :: acc + real(psb_dpk_), allocatable :: vt(:) + logical :: tra + Integer :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + d = dzero + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + (a%val(i,j)) + end do + end do + + return + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act == psb_act_abort_) then + call psb_error() + return + end if + return + +end subroutine psb_d_elg_colsum + +subroutine psb_d_elg_aclsum(d,a) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_aclsum + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer :: i,j,k,m,n, nnz, ir, jc, nc + real(psb_dpk_) :: acc + real(psb_dpk_), allocatable :: vt(:) + logical :: tra + Integer :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + d = dzero + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + abs(a%val(i,j)) + end do + end do + + return + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act == psb_act_abort_) then + call psb_error() + return + end if + return + +end subroutine psb_d_elg_aclsum + + +subroutine psb_d_elg_get_diag(a,d,info) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_get_diag + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer, intent(out) :: info + + Integer :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2,size(d),0,0,0/)) + goto 9999 + end if + + + if (a%is_triangle().and.a%is_unit()) then + d(1:mnm) = done + else + do i=1, mnm + if (1<=a%idiag(i).and.(a%idiag(i)<=size(a%ja,2))) & + & d(i) = a%val(i,a%idiag(i)) + end do + end if + do i=mnm+1,size(d) + d(i) = dzero + end do + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act == psb_act_abort_) then + call psb_error() + return + end if + return + +end subroutine psb_d_elg_get_diag + + +subroutine psb_d_elg_scal(d,a,info) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_scal + implicit none + class(psb_d_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer, intent(out) :: info + + Integer :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2,size(d),0,0,0/)) + goto 9999 + end if + + do i=1, m + a%val(i,:) = a%val(i,:) * d(i) + enddo + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act == psb_act_abort_) then + call psb_error() + return + end if + return + +end subroutine psb_d_elg_scal + + +subroutine psb_d_elg_scals(d,a,info) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_scals + implicit none + class(psb_d_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer, intent(out) :: info + + Integer :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + + a%val(:,:) = a%val(:,:) * d + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act == psb_act_abort_) then + call psb_error() + return + end if + return + +end subroutine psb_d_elg_scals + + + + +! == =================================== +! +! +! +! Data management +! +! +! +! +! +! == =================================== + + +subroutine psb_d_elg_reallocate_nz(nz,a) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_reallocate_nz + implicit none + integer, intent(in) :: nz + class(psb_d_elg_sparse_mat), intent(inout) :: a + integer :: m, nzrm + Integer :: err_act, info + character(len=20) :: name='d_elg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + m = a%get_nrows() + nzrm = (nz+m-1)/m + call psb_realloc(m,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act == psb_act_abort_) then + call psb_error() + return + end if + return + +end subroutine psb_d_elg_reallocate_nz + +subroutine psb_d_elg_mold(a,b,info) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_mold + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(out), allocatable :: b + integer, intent(out) :: info + Integer :: err_act + character(len=20) :: name='reallocate_nz' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + allocate(psb_d_elg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return +9999 continue + if (err_act /= psb_act_ret_) then + call psb_error() + end if + return + +end subroutine psb_d_elg_mold + +subroutine psb_d_elg_allocate_mnnz(m,n,a,nz) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_allocate_mnnz + implicit none + integer, intent(in) :: m,n + class(psb_d_elg_sparse_mat), intent(inout) :: a + integer, intent(in), optional :: nz + Integer :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/1,0,0,0,0/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2,0,0,0,0/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (nz + m -1 )/m + else + nz_ = (max(7*m,7*n,1)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3,0,0,0,0/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act == psb_act_abort_) then + call psb_error() + return + end if + return + +end subroutine psb_d_elg_allocate_mnnz + + +subroutine psb_d_elg_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_csgetptn + implicit none + + class(psb_d_elg_sparse_mat), intent(in) :: a + integer, intent(in) :: imin,imax + integer, intent(out) :: nz + integer, allocatable, intent(inout) :: ia(:), ja(:) + integer,intent(out) :: info + logical, intent(in), optional :: append + integer, intent(in), optional :: iren(:) + integer, intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='csget' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_d_elg_csgetrow + implicit none + + class(psb_d_elg_sparse_mat), intent(in) :: a + integer, intent(in) :: imin,imax + integer, intent(out) :: nz + integer, allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer,intent(out) :: info + logical, intent(in), optional :: append + integer, intent(in), optional :: iren(:) + integer, intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='csget' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_d_elg_csgetblk + implicit none + + class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer, intent(in) :: imin,imax + integer,intent(out) :: info + logical, intent(in), optional :: append + integer, intent(in), optional :: iren(:) + integer, intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer :: err_act, nzin, nzout + character(len=20) :: name='csget' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act == psb_act_abort_) then + call psb_error() + return + end if + return + +end subroutine psb_d_elg_csgetblk + + + +subroutine psb_d_elg_csput(nz,ia,ja,val,a,imin,imax,jmin,jmax,info,gtl) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_csput + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer, intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer, intent(out) :: info + integer, intent(in), optional :: gtl(:) + + + Integer :: err_act + character(len=20) :: name='d_elg_csput' + logical, parameter :: debug=.false. + integer :: nza, i,j,k, nzl, isza, int_err(5) + + + call psb_erractionsave(err_act) + info = psb_success_ + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + call psb_d_elg_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info,gtl) + + if (info /= psb_success_) then + + info = psb_err_invalid_mat_state_ + end if + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act == psb_act_abort_) then + call psb_error() + return + end if + return + + +contains + + subroutine psb_d_elg_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info,gtl) + + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + integer, intent(in) :: nz, imin,imax,jmin,jmax + integer, intent(in) :: ia(:),ja(:) + real(psb_dpk_), intent(in) :: val(:) + integer, intent(out) :: info + integer, intent(in), optional :: gtl(:) + integer :: i,ir,ic, ilr, ilc, ip, & + & i1,i2,nr,nc,nnz,dupl,ng + integer :: debug_level, debug_unit + character(len=20) :: name='d_elg_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + ilr = -1 + ilc = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + + if (present(gtl)) then + ng = size(gtl) + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir >=1).and.(ir<=ng).and.(ic>=1).and.(ic<=ng)) then + ir = gtl(ir) + ic = gtl(ic) + if ((ir > 0).and.(ir <= nr)) then + nc = a%irn(ir) + ip = psb_ibsrch(ic,nc,a%ja(i,1:nc)) + if (ip>0) then + a%val(i,ip) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(i,1:nc) + info = i + return + end if + + else + + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + end if + end do + + case(psb_dupl_add_) + ! Add + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir >=1).and.(ir<=ng).and.(ic>=1).and.(ic<=ng)) then + ir = gtl(ir) + ic = gtl(ic) + if ((ir > 0).and.(ir <= nr)) then + nc = a%irn(ir) + ip = psb_ibsrch(ic,nc,a%ja(i,1:nc)) + if (ip>0) then + a%val(i,ip) = a%val(i,ip) + val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(i,1:nc) + info = i + return + end if + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + else + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + + nc = a%irn(ir) + ip = psb_ibsrch(ic,nc,a%ja(i,1:nc)) + if (ip>0) then + a%val(i,ip) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(i,1:nc) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + + end do + + case(psb_dupl_add_) + ! Add + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + nc = a%irn(ir) + ip = psb_ibsrch(ic,nc,a%ja(i,1:nc)) + if (ip>0) then + a%val(i,ip) = a%val(i,ip) + val(i) + else + info = i + return + end if + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end if + + end subroutine psb_d_elg_srch_upd + +end subroutine psb_d_elg_csput + + + +subroutine psb_d_elg_reinit(a,clear) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_reinit + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (clear_) a%val(:,:) = dzero + call a%set_upd() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act == psb_act_abort_) then + call psb_error() + return + end if + return + +end subroutine psb_d_elg_reinit + +subroutine psb_d_elg_trim(a) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_trim + implicit none + class(psb_d_elg_sparse_mat), intent(inout) :: a + Integer :: err_act, info, nz, m, nzm + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = a%get_nrows() + nzm = maxval(a%irn(1:m)) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act == psb_act_abort_) then + call psb_error() + return + end if + return + +end subroutine psb_d_elg_trim + +subroutine psb_d_elg_print(iout,a,iv,eirs,eics,head,ivr,ivc) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_print + implicit none + + integer, intent(in) :: iout + class(psb_d_elg_sparse_mat), intent(in) :: a + integer, intent(in), optional :: iv(:) + integer, intent(in), optional :: eirs,eics + character(len=*), optional :: head + integer, intent(in), optional :: ivr(:), ivc(:) + + Integer :: err_act + character(len=20) :: name='d_elg_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmtv + integer :: irs,ics,i,j, nmx, ni, nr, nc, nz + + if (present(eirs)) then + irs = eirs + else + irs = 0 + endif + if (present(eics)) then + ics = eics + else + ics = 0 + endif + + if (present(head)) then + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + endif + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + nmx = max(nr,nc,1) + ni = floor(log10(1.0*nmx)) + 1 + + write(frmtv,'(a,i3.3,a,i3.3,a)') '(2(i',ni,',1x),es26.18,1x,2(i',ni,',1x))' + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmtv) iv(i),iv(a%ja(i,j)),a%val(i,j) + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmtv) ivr(i),(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmtv) ivr(i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmtv) (i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmtv) (i),(a%ja(i,j)),a%val(i,j) + end do + enddo + endif + endif + +end subroutine psb_d_elg_print + + +subroutine psb_d_cp_elg_from_coo(a,b,info) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_cp_elg_from_coo + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer, intent(out) :: info + + type(psb_d_coo_sparse_mat) :: tmp + integer, allocatable :: itemp(:) + !locals + logical :: rwshr_ + Integer :: nza, nr, i,j,irw, idl,err_act, nc + Integer, Parameter :: maxtry=8 + integer :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + ! This is to have fix_coo called behind the scenes + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + +end subroutine psb_d_cp_elg_from_coo + + + +subroutine psb_d_cp_elg_to_coo(a,b,info) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_cp_elg_to_coo + implicit none + + class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + + integer, allocatable :: itemp(:) + !locals + logical :: rwshr_ + Integer :: nza, nr, nc,i,j,k,irw, idl,err_act + Integer, Parameter :: maxtry=8 + integer :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + call b%psb_d_base_sparse_mat%cp_from(a%psb_d_base_sparse_mat) + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + b%val(k) = a%val(i,j) + end do + end do + call b%set_nzeros(a%get_nzeros()) + call b%fix(info) + +end subroutine psb_d_cp_elg_to_coo + +subroutine psb_d_mv_elg_to_coo(a,b,info) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_mv_elg_to_coo + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + + integer, allocatable :: itemp(:) + !locals + logical :: rwshr_ + Integer :: nza, nr, nc,i,j,k,irw, idl,err_act + Integer, Parameter :: maxtry=8 + integer :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + ! Taking a path slightly slower but with less memory footprint + deallocate(a%idiag) + call b%psb_d_base_sparse_mat%cp_from(a%psb_d_base_sparse_mat) + + call psb_realloc(nza,b%ia,info) + if (info == 0) call psb_realloc(nza,b%ja,info) + if (info /= 0) goto 9999 + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + end do + end do + deallocate(a%ja, stat=info) + + if (info == 0) call psb_realloc(nza,b%val,info) + if (info /= 0) goto 9999 + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%val(k) = a%val(i,j) + end do + end do + call a%free() + call b%set_nzeros(nza) + call b%fix(info) + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_d_mv_elg_to_coo + + +subroutine psb_d_mv_elg_from_coo(a,b,info) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_mv_elg_from_coo + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + + integer, allocatable :: itemp(:) + !locals + logical :: rwshr_ + Integer :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic + Integer, Parameter :: maxtry=8 + integer :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + call b%fix(info) + if (info /= psb_success_) return + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + if (b%is_sorted()) then + ! If it is sorted then we can lessen memory impact + call a%psb_d_base_sparse_mat%mv_from(b%psb_d_base_sparse_mat) + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= 0) goto 9999 + a%irn = 0 + do i=1, nza + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + end do + nzm = 0 + do i=1, nr + nzm = max(nzm,a%irn(i)) + a%irn(i) = 0 + end do + ! Second: copy the column indices. + call psb_realloc(nr,a%idiag,info) + if (info == 0) call psb_realloc(nr,nzm,a%ja,info) + if (info /= 0) goto 9999 + do i=1, nza + ir = b%ia(i) + ic = b%ja(i) + j = a%irn(ir) + 1 + a%ja(ir,j) = ic + a%irn(ir) = j + end do + ! Third copy the other stuff + deallocate(b%ia,b%ja,stat=info) + if (info == 0) call psb_realloc(nr,a%idiag,info) + if (info == 0) call psb_realloc(nr,nzm,a%val,info) + if (info /= 0) goto 9999 + k = 0 + do i=1, nr + a%idiag(i) = 0 + do j=1, a%irn(i) + k = k + 1 + a%val(i,j) = b%val(k) + if (i==a%ja(i,j)) a%idiag(i)=j + end do + do j=a%irn(i)+1, nzm + a%ja(i,j) = i + a%val(i,j) = dzero + end do + end do + + else + ! If b is not sorted, the only way is to copy. + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + end if + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + +end subroutine psb_d_mv_elg_from_coo + + +subroutine psb_d_mv_elg_to_fmt(a,b,info) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_mv_elg_to_fmt + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + logical :: rwshr_ + Integer :: nza, nr, i,j,irw, idl,err_act, nc + Integer, Parameter :: maxtry=8 + integer :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_d_elg_sparse_mat) + call b%psb_d_base_sparse_mat%mv_from(a%psb_d_base_sparse_mat) + call move_alloc(a%irn, b%irn) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + call a%free() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_mv_elg_to_fmt + + +subroutine psb_d_cp_elg_to_fmt(a,b,info) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_cp_elg_to_fmt + implicit none + + class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + logical :: rwshr_ + Integer :: nza, nr, i,j,irw, idl,err_act, nc + Integer, Parameter :: maxtry=8 + integer :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_d_elg_sparse_mat) + call b%psb_d_base_sparse_mat%cp_from(a%psb_d_base_sparse_mat) + call psb_safe_cpy( a%idiag, b%idiag , info) + call psb_safe_cpy( a%irn, b%irn , info) + call psb_safe_cpy( a%ja , b%ja , info) + call psb_safe_cpy( a%val, b%val , info) + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_cp_elg_to_fmt + + +subroutine psb_d_mv_elg_from_fmt(a,b,info) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_mv_elg_from_fmt + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + logical :: rwshr_ + Integer :: nza, nr, i,j,irw, idl,err_act, nc + Integer, Parameter :: maxtry=8 + integer :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_d_elg_sparse_mat) + call a%psb_d_base_sparse_mat%mv_from(b%psb_d_base_sparse_mat) + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + call b%free() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_mv_elg_from_fmt + + + +subroutine psb_d_cp_elg_from_fmt(a,b,info) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_cp_elg_from_fmt + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer, intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + logical :: rwshr_ + Integer :: nz, nr, i,j,irw, idl,err_act, nc + Integer, Parameter :: maxtry=8 + integer :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + + type is (psb_d_elg_sparse_mat) + call a%psb_d_base_sparse_mat%cp_from(b%psb_d_base_sparse_mat) + call psb_safe_cpy( b%irn, a%irn , info) + call psb_safe_cpy( b%idiag, a%idiag, info) + call psb_safe_cpy( b%ja , a%ja , info) + call psb_safe_cpy( b%val, a%val , info) + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_d_cp_elg_from_fmt + + +subroutine psb_d_elg_cp_from(a,b) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_cp_from + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + type(psb_d_elg_sparse_mat), intent(in) :: b + + + Integer :: err_act, info + character(len=20) :: name='cp_from' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + info = psb_success_ + + call a%allocate(b%get_nrows(),b%get_ncols(),b%get_nzeros()) + call a%psb_d_base_sparse_mat%cp_from(b%psb_d_base_sparse_mat) + call psb_safe_cpy( b%irn, a%irn , info) + call psb_safe_cpy( b%idiag, a%idiag, info) + call psb_safe_cpy( b%ja , a%ja , info) + call psb_safe_cpy( b%val, a%val , info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + call psb_errpush(info,name) + + if (err_act /= psb_act_ret_) then + call psb_error() + end if + return + +end subroutine psb_d_elg_cp_from + +subroutine psb_d_elg_mv_from(a,b) + use psb_sparse_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_mv_from + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + type(psb_d_elg_sparse_mat), intent(inout) :: b + + + Integer :: err_act, info + character(len=20) :: name='mv_from' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + call a%psb_d_base_sparse_mat%mv_from(b%psb_d_base_sparse_mat) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%irn, a%irn) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + call b%free() + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + call psb_errpush(info,name) + + if (err_act /= psb_act_ret_) then + call psb_error() + end if + return + +end subroutine psb_d_elg_mv_from + + +#endif diff --git a/opt/psb_d_elg_mat_mod.F90 b/opt/psb_d_elg_mat_mod.F90 new file mode 100644 index 00000000..b19bf467 --- /dev/null +++ b/opt/psb_d_elg_mat_mod.F90 @@ -0,0 +1,289 @@ +module psb_d_elg_mat_mod + + use iso_c_binding + use psb_d_base_mat_mod + use psb_d_ell_mat_mod + + type, extends(psb_d_ell_sparse_mat) :: psb_d_elg_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_ELL_GPU is undefined this is just + ! a copy of ELL, indistinguishable. + ! +#ifdef HAVE_ELL_GPU + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, pass(a) :: get_fmt => d_elg_get_fmt + procedure, pass(a) :: sizeof => d_elg_sizeof + procedure, pass(a) :: d_csmm => psb_d_elg_csmm + procedure, pass(a) :: d_csmv => psb_d_elg_csmv + procedure, pass(a) :: d_scals => psb_d_elg_scals + procedure, pass(a) :: d_scal => psb_d_elg_scal + procedure, pass(a) :: reallocate_nz => psb_d_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_elg_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_d_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_mv_elg_from_fmt + procedure, pass(a) :: free => d_elg_free + procedure, pass(a) :: mold => psb_d_elg_mold + procedure, pass(a) :: psb_d_elg_cp_from + generic, public :: cp_from => psb_d_elg_cp_from + procedure, pass(a) :: psb_d_elg_mv_from + generic, public :: mv_from => psb_d_elg_mv_from +#endif + end type psb_d_elg_sparse_mat + +#ifdef HAVE_ELL_GPU + private :: d_elg_get_nzeros, d_elg_free, d_elg_get_fmt, & + & d_elg_get_size, d_elg_sizeof, d_elg_get_nz_row + + interface + subroutine psb_d_elg_reallocate_nz(nz,a) + import :: psb_d_elg_sparse_mat + integer, intent(in) :: nz + class(psb_d_elg_sparse_mat), intent(inout) :: a + end subroutine psb_d_elg_reallocate_nz + end interface + + interface + subroutine psb_d_elg_allocate_mnnz(m,n,a,nz) + import :: psb_d_elg_sparse_mat + integer, intent(in) :: m,n + class(psb_d_elg_sparse_mat), intent(inout) :: a + integer, intent(in), optional :: nz + end subroutine psb_d_elg_allocate_mnnz + end interface + + interface + subroutine psb_d_elg_mold(a,b,info) + import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_long_int_k_ + class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(out), allocatable :: b + integer, intent(out) :: info + end subroutine psb_d_elg_mold + end interface + + interface + subroutine psb_d_cp_elg_from_coo(a,b,info) + import :: psb_d_elg_sparse_mat, psb_d_coo_sparse_mat + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer, intent(out) :: info + end subroutine psb_d_cp_elg_from_coo + end interface + + interface + subroutine psb_d_cp_elg_from_fmt(a,b,info) + import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer, intent(out) :: info + end subroutine psb_d_cp_elg_from_fmt + end interface + + interface + subroutine psb_d_mv_elg_from_coo(a,b,info) + import :: psb_d_elg_sparse_mat, psb_d_coo_sparse_mat + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + end subroutine psb_d_mv_elg_from_coo + end interface + + + interface + subroutine psb_d_mv_elg_from_fmt(a,b,info) + import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + end subroutine psb_d_mv_elg_from_fmt + end interface + + interface + subroutine psb_d_elg_cp_from(a,b) + import :: psb_d_elg_sparse_mat, psb_dpk_ + class(psb_d_elg_sparse_mat), intent(inout) :: a + type(psb_d_elg_sparse_mat), intent(in) :: b + end subroutine psb_d_elg_cp_from + end interface + + interface + subroutine psb_d_elg_mv_from(a,b) + import :: psb_d_elg_sparse_mat, psb_dpk_ + class(psb_d_elg_sparse_mat), intent(inout) :: a + type(psb_d_elg_sparse_mat), intent(inout) :: b + end subroutine psb_d_elg_mv_from + end interface + + interface + subroutine psb_d_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_elg_sparse_mat, psb_dpk_ + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_elg_csmv + end interface + interface + subroutine psb_d_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_elg_sparse_mat, psb_dpk_ + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_elg_csmm + end interface + + interface + subroutine psb_d_elg_scal(d,a,info) + import :: psb_d_elg_sparse_mat, psb_dpk_ + class(psb_d_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer, intent(out) :: info + end subroutine psb_d_elg_scal + end interface + + interface + subroutine psb_d_elg_scals(d,a,info) + import :: psb_d_elg_sparse_mat, psb_dpk_ + class(psb_d_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer, intent(out) :: info + end subroutine psb_d_elg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_elg_sizeof(a) result(res) + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + integer(psb_long_int_k_) :: res + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_int * size(a%irn) + res = res + psb_sizeof_int * size(a%idiag) + res = res + psb_sizeof_int * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_elg_sizeof + + function d_elg_get_fmt(a) result(res) + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + character(len=5) :: res + res = 'ELG' + end function d_elg_get_fmt + +!!$ function d_elg_get_nzeros(a) result(res) +!!$ implicit none +!!$ class(psb_d_elg_sparse_mat), intent(in) :: a +!!$ integer :: res +!!$ res = sum(a%irn(1:a%get_nrows())) +!!$ end function d_elg_get_nzeros +!!$ +!!$ function d_elg_get_size(a) result(res) +!!$ implicit none +!!$ class(psb_d_elg_sparse_mat), intent(in) :: a +!!$ integer :: res +!!$ +!!$ res = -1 +!!$ +!!$ if (allocated(a%ja)) then +!!$ if (res >= 0) then +!!$ res = min(res,size(a%ja)) +!!$ else +!!$ res = size(a%ja) +!!$ end if +!!$ end if +!!$ if (allocated(a%val)) then +!!$ if (res >= 0) then +!!$ res = min(res,size(a%val)) +!!$ else +!!$ res = size(a%val) +!!$ end if +!!$ end if +!!$ +!!$ end function d_elg_get_size +!!$ +!!$ +!!$ +!!$ function d_elg_get_nz_row(idx,a) result(res) +!!$ +!!$ implicit none +!!$ +!!$ class(psb_d_elg_sparse_mat), intent(in) :: a +!!$ integer, intent(in) :: idx +!!$ integer :: res +!!$ +!!$ res = 0 +!!$ +!!$ if ((1<=idx).and.(idx<=a%get_nrows())) then +!!$ res = a%irn(idx) +!!$ end if +!!$ +!!$ end function d_elg_get_nz_row +!!$ + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_elg_free(a) +#ifdef HAVE_ELL_GPU + use elldev_mod +#endif + implicit none + integer :: info + + class(psb_d_elg_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + call a%set_null() + call a%set_nrows(0) + call a%set_ncols(0) +#ifdef HAVE_ELL_GPU + call freeEllDevice(a%deviceMat) +#endif + + return + + end subroutine d_elg_free + +#endif + +end module psb_d_elg_mat_mod