From f7d2c856ece7dedfbb8495d1ce7f68bf2cd699f6 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 7 Nov 2023 21:44:22 +0100 Subject: [PATCH 01/12] Fix gather with SERIAL --- base/modules/penv/psi_c_collective_mod.F90 | 8 ++++---- base/modules/penv/psi_d_collective_mod.F90 | 8 ++++---- base/modules/penv/psi_e_collective_mod.F90 | 8 ++++---- base/modules/penv/psi_i2_collective_mod.F90 | 8 ++++---- base/modules/penv/psi_m_collective_mod.F90 | 8 ++++---- base/modules/penv/psi_s_collective_mod.F90 | 8 ++++---- base/modules/penv/psi_z_collective_mod.F90 | 8 ++++---- 7 files changed, 28 insertions(+), 28 deletions(-) diff --git a/base/modules/penv/psi_c_collective_mod.F90 b/base/modules/penv/psi_c_collective_mod.F90 index 8da302d0..dd6f88ed 100644 --- a/base/modules/penv/psi_c_collective_mod.F90 +++ b/base/modules/penv/psi_c_collective_mod.F90 @@ -107,7 +107,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(1) = dat #else call psb_info(ctxt,iam,np) @@ -160,7 +160,7 @@ contains use mpi #endif implicit none -#ifdef MPI_H +#ifdef MP.I_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -175,7 +175,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) @@ -245,7 +245,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) diff --git a/base/modules/penv/psi_d_collective_mod.F90 b/base/modules/penv/psi_d_collective_mod.F90 index 9639d650..5376710b 100644 --- a/base/modules/penv/psi_d_collective_mod.F90 +++ b/base/modules/penv/psi_d_collective_mod.F90 @@ -747,7 +747,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(1) = dat #else call psb_info(ctxt,iam,np) @@ -800,7 +800,7 @@ contains use mpi #endif implicit none -#ifdef MPI_H +#ifdef MP.I_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -815,7 +815,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) @@ -885,7 +885,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) diff --git a/base/modules/penv/psi_e_collective_mod.F90 b/base/modules/penv/psi_e_collective_mod.F90 index b9ab089b..7e134f79 100644 --- a/base/modules/penv/psi_e_collective_mod.F90 +++ b/base/modules/penv/psi_e_collective_mod.F90 @@ -585,7 +585,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(1) = dat #else call psb_info(ctxt,iam,np) @@ -638,7 +638,7 @@ contains use mpi #endif implicit none -#ifdef MPI_H +#ifdef MP.I_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -653,7 +653,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) @@ -723,7 +723,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) diff --git a/base/modules/penv/psi_i2_collective_mod.F90 b/base/modules/penv/psi_i2_collective_mod.F90 index 339e4281..23e3cad6 100644 --- a/base/modules/penv/psi_i2_collective_mod.F90 +++ b/base/modules/penv/psi_i2_collective_mod.F90 @@ -585,7 +585,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(1) = dat #else call psb_info(ctxt,iam,np) @@ -638,7 +638,7 @@ contains use mpi #endif implicit none -#ifdef MPI_H +#ifdef MP.I_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -653,7 +653,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) @@ -723,7 +723,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) diff --git a/base/modules/penv/psi_m_collective_mod.F90 b/base/modules/penv/psi_m_collective_mod.F90 index 8f45d398..b6021d33 100644 --- a/base/modules/penv/psi_m_collective_mod.F90 +++ b/base/modules/penv/psi_m_collective_mod.F90 @@ -585,7 +585,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(1) = dat #else call psb_info(ctxt,iam,np) @@ -638,7 +638,7 @@ contains use mpi #endif implicit none -#ifdef MPI_H +#ifdef MP.I_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -653,7 +653,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) @@ -723,7 +723,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) diff --git a/base/modules/penv/psi_s_collective_mod.F90 b/base/modules/penv/psi_s_collective_mod.F90 index 6ffaae05..44fdc8df 100644 --- a/base/modules/penv/psi_s_collective_mod.F90 +++ b/base/modules/penv/psi_s_collective_mod.F90 @@ -747,7 +747,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(1) = dat #else call psb_info(ctxt,iam,np) @@ -800,7 +800,7 @@ contains use mpi #endif implicit none -#ifdef MPI_H +#ifdef MP.I_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -815,7 +815,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) @@ -885,7 +885,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) diff --git a/base/modules/penv/psi_z_collective_mod.F90 b/base/modules/penv/psi_z_collective_mod.F90 index 8b3ec277..6600edc4 100644 --- a/base/modules/penv/psi_z_collective_mod.F90 +++ b/base/modules/penv/psi_z_collective_mod.F90 @@ -107,7 +107,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(1) = dat #else call psb_info(ctxt,iam,np) @@ -160,7 +160,7 @@ contains use mpi #endif implicit none -#ifdef MPI_H +#ifdef MP.I_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -175,7 +175,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) @@ -245,7 +245,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) From b850c0ef6aaf33d11e06c027eb0cc9cfd6f13b48 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 9 Nov 2023 08:41:43 +0100 Subject: [PATCH 02/12] Fix typo --- base/modules/penv/psi_c_collective_mod.F90 | 2 +- base/modules/penv/psi_d_collective_mod.F90 | 2 +- base/modules/penv/psi_e_collective_mod.F90 | 2 +- base/modules/penv/psi_i2_collective_mod.F90 | 2 +- base/modules/penv/psi_m_collective_mod.F90 | 2 +- base/modules/penv/psi_s_collective_mod.F90 | 2 +- base/modules/penv/psi_z_collective_mod.F90 | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/base/modules/penv/psi_c_collective_mod.F90 b/base/modules/penv/psi_c_collective_mod.F90 index dd6f88ed..80a4b6a1 100644 --- a/base/modules/penv/psi_c_collective_mod.F90 +++ b/base/modules/penv/psi_c_collective_mod.F90 @@ -160,7 +160,7 @@ contains use mpi #endif implicit none -#ifdef MP.I_H +#ifdef MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt diff --git a/base/modules/penv/psi_d_collective_mod.F90 b/base/modules/penv/psi_d_collective_mod.F90 index 5376710b..67f95f55 100644 --- a/base/modules/penv/psi_d_collective_mod.F90 +++ b/base/modules/penv/psi_d_collective_mod.F90 @@ -800,7 +800,7 @@ contains use mpi #endif implicit none -#ifdef MP.I_H +#ifdef MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt diff --git a/base/modules/penv/psi_e_collective_mod.F90 b/base/modules/penv/psi_e_collective_mod.F90 index 7e134f79..5d66eed6 100644 --- a/base/modules/penv/psi_e_collective_mod.F90 +++ b/base/modules/penv/psi_e_collective_mod.F90 @@ -638,7 +638,7 @@ contains use mpi #endif implicit none -#ifdef MP.I_H +#ifdef MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt diff --git a/base/modules/penv/psi_i2_collective_mod.F90 b/base/modules/penv/psi_i2_collective_mod.F90 index 23e3cad6..88d40b66 100644 --- a/base/modules/penv/psi_i2_collective_mod.F90 +++ b/base/modules/penv/psi_i2_collective_mod.F90 @@ -638,7 +638,7 @@ contains use mpi #endif implicit none -#ifdef MP.I_H +#ifdef MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt diff --git a/base/modules/penv/psi_m_collective_mod.F90 b/base/modules/penv/psi_m_collective_mod.F90 index b6021d33..c97ac5a3 100644 --- a/base/modules/penv/psi_m_collective_mod.F90 +++ b/base/modules/penv/psi_m_collective_mod.F90 @@ -638,7 +638,7 @@ contains use mpi #endif implicit none -#ifdef MP.I_H +#ifdef MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt diff --git a/base/modules/penv/psi_s_collective_mod.F90 b/base/modules/penv/psi_s_collective_mod.F90 index 44fdc8df..6dcc5253 100644 --- a/base/modules/penv/psi_s_collective_mod.F90 +++ b/base/modules/penv/psi_s_collective_mod.F90 @@ -800,7 +800,7 @@ contains use mpi #endif implicit none -#ifdef MP.I_H +#ifdef MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt diff --git a/base/modules/penv/psi_z_collective_mod.F90 b/base/modules/penv/psi_z_collective_mod.F90 index 6600edc4..ff5e6a2d 100644 --- a/base/modules/penv/psi_z_collective_mod.F90 +++ b/base/modules/penv/psi_z_collective_mod.F90 @@ -160,7 +160,7 @@ contains use mpi #endif implicit none -#ifdef MP.I_H +#ifdef MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt From a082cdb1b648fa62de48781815e7090ae9c777a9 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 10 Nov 2023 13:38:19 +0100 Subject: [PATCH 03/12] Deactivate OpenMP in hash_g2lv_ins for the time being. --- base/modules/desc/psb_hash_map_mod.F90 | 35 +++++++++++++++----------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/base/modules/desc/psb_hash_map_mod.F90 b/base/modules/desc/psb_hash_map_mod.F90 index 058dbb8d..216c7aa4 100644 --- a/base/modules/desc/psb_hash_map_mod.F90 +++ b/base/modules/desc/psb_hash_map_mod.F90 @@ -683,7 +683,7 @@ contains mglob = idxmap%get_gr() nrow = idxmap%get_lr() !write(0,*) me,name,' before loop ',psb_errstatus_fatal() -#ifdef OPENMP +#if 0 && defined(OPENMP) !call OMP_init_lock(ins_lck) if (idxmap%is_bld()) then @@ -714,9 +714,9 @@ contains idx(i) = -1 cycle endif - !call OMP_set_lock(ins_lck) + call OMP_set_lock(ins_lck) ncol = idxmap%get_lc() - !call OMP_unset_lock(ins_lck) + call OMP_unset_lock(ins_lck) ! At first, we check the index presence in 'idxmap'. Usually ! the index is found. If it is not found, we repeat the checking, @@ -724,7 +724,7 @@ contains call hash_inner_cnv(ip,lip,idxmap%hashvmask,& & idxmap%hashv,idxmap%glb_lc,ncol) if (lip < 0) then - !call OMP_set_lock(ins_lck) + call OMP_set_lock(ins_lck) ! We check again if the index is already in 'idxmap', this ! time inside a critical region (we assume that the index @@ -738,9 +738,9 @@ contains idx(i) = lip else if (lip < 0) then ! Index not found - call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) + call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) lip = tlip - + if (info /= 0) write(0,*) ' inskey 1 info:',info if (info >= 0) then ! 'nxt' is not equal to 'tlip' when the key is already inside @@ -766,10 +766,11 @@ contains call idxmap%set_lc(ncol) end if end if + if (isLoopValid) info = 0 else idx(i) = -1 end if - !call OMP_unset_lock(ins_lck) + call OMP_unset_lock(ins_lck) end if else idx(i) = lip @@ -800,9 +801,9 @@ contains idx(i) = -1 cycle endif - !call OMP_set_lock(ins_lck) + call OMP_set_lock(ins_lck) ncol = idxmap%get_lc() - !call OMP_unset_lock(ins_lck) + call OMP_unset_lock(ins_lck) ! At first, we check the index presence in 'idxmap'. Usually ! the index is found. If it is not found, we repeat the checking, @@ -810,7 +811,7 @@ contains call hash_inner_cnv(ip,lip,idxmap%hashvmask,& & idxmap%hashv,idxmap%glb_lc,ncol) if (lip < 0) then - !call OMP_set_lock(ins_lck) + call OMP_set_lock(ins_lck) ! We check again if the index is already in 'idxmap', this ! time inside a critical region (we assume that the index ! is often already existing). @@ -824,7 +825,7 @@ contains else if (lip < 0) then call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) lip = tlip - +!!$ if (info /= 0) write(0,*) ' inskey 2 info:',info if (info >= 0) then ! 'nxt' is not equal to 'tlip' when the key is already inside ! the hash map. In that case 'tlip' is the value corresponding @@ -849,10 +850,11 @@ contains call idxmap%set_lc(ncol) end if end if + if (isLoopValid) info = 0 else idx(i) = -1 end if - !call OMP_unset_lock(ins_lck) + call OMP_unset_lock(ins_lck) end if else idx(i) = lip @@ -911,6 +913,7 @@ contains ! Index not found !write(0,*) me,name,' b hsik ',psb_errstatus_fatal() call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) +!!$ if (info /= 0) write(0,*) ' inskey 3 info:',info if (psb_errstatus_fatal()) write(0,*) me,name,' a hsik ',info,omp_get_thread_num() !write(0,*) me,name,' a hsik ',psb_errstatus_fatal() lip = tlip @@ -927,7 +930,7 @@ contains & pad=-1_psb_lpk_,addsz=laddsz) if (psb_errstatus_fatal()) write(0,*) me,name,' a esz ',info,omp_get_thread_num() if (info /= psb_success_) then - !write(0,*) 'Error spot 3', info + write(0,*) 'Error spot 3', info call psb_errpush(psb_err_from_subroutine_ai_,name,& &a_err='psb_ensure_size',i_err=(/info/)) @@ -939,9 +942,11 @@ contains call idxmap%set_lc(ncol) end if end if + if (isLoopValid) info = 0 else idx(i) = -1 end if +!!$ if (info /= 0) write(0,*) ' inskey 3.5 info:',info, isLoopValid !call OMP_unset_lock(ins_lck) end if else @@ -997,7 +1002,8 @@ contains ! Index not found call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) lip = tlip - +!!$ if (info /= 0) write(0,*) ' inskey 4 info:',info + if (info >= 0) then ! 'nxt' is not equal to 'tlip' when the key is already inside ! the hash map. In that case 'tlip' is the value corresponding @@ -1022,6 +1028,7 @@ contains call idxmap%set_lc(ncol) end if end if + if (isLoopValid) info = 0 else idx(i) = -1 end if From cce3103bb49832dd9d2549aa9afe3eb8a82896af Mon Sep 17 00:00:00 2001 From: sfilippone Date: Sat, 25 Nov 2023 20:06:26 +0100 Subject: [PATCH 04/12] Fix CXXDEFINES --- configure | 2 ++ configure.ac | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/configure b/configure index 752fe192..504dbf4c 100755 --- a/configure +++ b/configure @@ -7532,6 +7532,8 @@ fi # Custom test : do we have a module or include for MPI Fortran interface? if test x"$pac_cv_serial_mpi" == x"yes" ; then FDEFINES="$psblas_cv_define_prepend-DSERIAL_MPI $psblas_cv_define_prepend-DMPI_MOD $FDEFINES"; + CDEFINES="-DSERIAL_MPI $CDEFINES" + CXXDEFINES="-DSERIAL_MPI $CXXDEFINES" else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking MPI Fortran 2008 interface" >&5 printf %s "checking MPI Fortran 2008 interface... " >&6; } diff --git a/configure.ac b/configure.ac index d8a02a50..47b94e7a 100755 --- a/configure.ac +++ b/configure.ac @@ -513,6 +513,7 @@ fi # Custom test : do we have a module or include for MPI Fortran interface? if test x"$pac_cv_serial_mpi" == x"yes" ; then FDEFINES="$psblas_cv_define_prepend-DSERIAL_MPI $psblas_cv_define_prepend-DMPI_MOD $FDEFINES"; + CDEFINES="-DSERIAL_MPI $CDEFINES" else PAC_FORTRAN_CHECK_HAVE_MPI_MOD_F08() if test x"$pac_cv_mpi_f08" == x"yes" ; then @@ -847,9 +848,9 @@ AC_SUBST(FINCLUDES) PSBLASRULES=' PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS) -CXXDEFINES=$(PSBCXXDEFINES) CDEFINES=$(PSBCDEFINES) FDEFINES=$(PSBFDEFINES) +CXXDEFINES=$(PSBCXXDEFINES) # These should be portable rules, arent they? From 492b28f3429f00fd0cf2ff72de2f4dd9c92a32bb Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 29 Nov 2023 15:33:32 +0100 Subject: [PATCH 05/12] Fix wrong insert without OpenMP --- base/serial/impl/psb_c_coo_impl.F90 | 6 ++++++ base/serial/impl/psb_d_coo_impl.F90 | 6 ++++++ base/serial/impl/psb_s_coo_impl.F90 | 6 ++++++ base/serial/impl/psb_z_coo_impl.F90 | 6 ++++++ 4 files changed, 24 insertions(+) diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index 46391dee..b1d71321 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -2878,14 +2878,20 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (isza < (nza+nz)) then info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else +#if defined(OPENMP) nzaold = nza nza = nza + nz +#endif call a%set_nzeros(nza) end if !$omp end critical if (info /= 0) goto 9999 call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& & imin,imax,jmin,jmax,info) +#if !defined(OPENMP) + nza = nzaold + call a%set_nzeros(nza) +#endif call a%set_sorted(.false.) else if (a%is_upd()) then diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index c2babf8e..350085bb 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -2878,14 +2878,20 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (isza < (nza+nz)) then info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else +#if defined(OPENMP) nzaold = nza nza = nza + nz +#endif call a%set_nzeros(nza) end if !$omp end critical if (info /= 0) goto 9999 call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& & imin,imax,jmin,jmax,info) +#if !defined(OPENMP) + nza = nzaold + call a%set_nzeros(nza) +#endif call a%set_sorted(.false.) else if (a%is_upd()) then diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index 402c608a..51858efd 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -2878,14 +2878,20 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (isza < (nza+nz)) then info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else +#if defined(OPENMP) nzaold = nza nza = nza + nz +#endif call a%set_nzeros(nza) end if !$omp end critical if (info /= 0) goto 9999 call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& & imin,imax,jmin,jmax,info) +#if !defined(OPENMP) + nza = nzaold + call a%set_nzeros(nza) +#endif call a%set_sorted(.false.) else if (a%is_upd()) then diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index 542f842e..0624dd21 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -2878,14 +2878,20 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (isza < (nza+nz)) then info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else +#if defined(OPENMP) nzaold = nza nza = nza + nz +#endif call a%set_nzeros(nza) end if !$omp end critical if (info /= 0) goto 9999 call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& & imin,imax,jmin,jmax,info) +#if !defined(OPENMP) + nza = nzaold + call a%set_nzeros(nza) +#endif call a%set_sorted(.false.) else if (a%is_upd()) then From 8633e76cb0a7c5a7e8090de1496f1515b83df319 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 29 Nov 2023 16:11:11 +0100 Subject: [PATCH 06/12] Silly bug in coo insert --- base/serial/impl/psb_c_coo_impl.F90 | 2 +- base/serial/impl/psb_d_coo_impl.F90 | 2 +- base/serial/impl/psb_s_coo_impl.F90 | 2 +- base/serial/impl/psb_z_coo_impl.F90 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index b1d71321..5c90e287 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -2869,6 +2869,7 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) ! Hence the call to set_nzeros done here. !$omp critical nza = a%get_nzeros() + nzaold = nza isza = a%get_size() ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then @@ -2879,7 +2880,6 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else #if defined(OPENMP) - nzaold = nza nza = nza + nz #endif call a%set_nzeros(nza) diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index 350085bb..f6a173d1 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -2869,6 +2869,7 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) ! Hence the call to set_nzeros done here. !$omp critical nza = a%get_nzeros() + nzaold = nza isza = a%get_size() ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then @@ -2879,7 +2880,6 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else #if defined(OPENMP) - nzaold = nza nza = nza + nz #endif call a%set_nzeros(nza) diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index 51858efd..4c12d8fc 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -2869,6 +2869,7 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) ! Hence the call to set_nzeros done here. !$omp critical nza = a%get_nzeros() + nzaold = nza isza = a%get_size() ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then @@ -2879,7 +2880,6 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else #if defined(OPENMP) - nzaold = nza nza = nza + nz #endif call a%set_nzeros(nza) diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index 0624dd21..44ee89b5 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -2869,6 +2869,7 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) ! Hence the call to set_nzeros done here. !$omp critical nza = a%get_nzeros() + nzaold = nza isza = a%get_size() ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then @@ -2879,7 +2880,6 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else #if defined(OPENMP) - nzaold = nza nza = nza + nz #endif call a%set_nzeros(nza) From eed4c574bc0a86394dae07570787c00d7c8865fc Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 15 Apr 2024 12:19:16 +0200 Subject: [PATCH 07/12] Take out obsolete ilu_fct files --- prec/impl/Makefile | 8 +- prec/impl/psb_cilu_fct.f90 | 438 ------------------------------------ prec/impl/psb_dilu_fct.f90 | 441 ------------------------------------- prec/impl/psb_silu_fct.f90 | 440 ------------------------------------ prec/impl/psb_zilu_fct.f90 | 438 ------------------------------------ 5 files changed, 4 insertions(+), 1761 deletions(-) delete mode 100644 prec/impl/psb_cilu_fct.f90 delete mode 100644 prec/impl/psb_dilu_fct.f90 delete mode 100644 prec/impl/psb_silu_fct.f90 delete mode 100644 prec/impl/psb_zilu_fct.f90 diff --git a/prec/impl/Makefile b/prec/impl/Makefile index bc5ef2e1..2b6b1dc5 100644 --- a/prec/impl/Makefile +++ b/prec/impl/Makefile @@ -7,16 +7,16 @@ HERE=.. OBJS=psb_s_prec_type_impl.o psb_d_prec_type_impl.o \ psb_c_prec_type_impl.o psb_z_prec_type_impl.o \ psb_d_diagprec_impl.o psb_d_bjacprec_impl.o psb_d_nullprec_impl.o \ - psb_dilu_fct.o psb_d_ilu0_fact.o psb_d_iluk_fact.o psb_d_ilut_fact.o \ + psb_d_ilu0_fact.o psb_d_iluk_fact.o psb_d_ilut_fact.o \ psb_dprecbld.o psb_dprecinit.o \ psb_s_diagprec_impl.o psb_s_bjacprec_impl.o psb_s_nullprec_impl.o \ - psb_silu_fct.o psb_s_ilu0_fact.o psb_s_iluk_fact.o psb_s_ilut_fact.o \ + psb_s_ilu0_fact.o psb_s_iluk_fact.o psb_s_ilut_fact.o \ psb_sprecbld.o psb_sprecinit.o \ psb_c_diagprec_impl.o psb_c_bjacprec_impl.o psb_c_nullprec_impl.o \ - psb_cilu_fct.o psb_c_ilu0_fact.o psb_c_iluk_fact.o psb_c_ilut_fact.o \ + psb_c_ilu0_fact.o psb_c_iluk_fact.o psb_c_ilut_fact.o \ psb_cprecbld.o psb_cprecinit.o \ psb_z_diagprec_impl.o psb_z_bjacprec_impl.o psb_z_nullprec_impl.o \ - psb_zilu_fct.o psb_z_ilu0_fact.o psb_z_iluk_fact.o psb_z_ilut_fact.o \ + psb_z_ilu0_fact.o psb_z_iluk_fact.o psb_z_ilut_fact.o \ psb_zprecbld.o psb_zprecinit.o \ psb_c_sparsify.o psb_d_sparsify.o psb_s_sparsify.o psb_z_sparsify.o \ psb_crwclip.o psb_drwclip.o psb_srwclip.o psb_zrwclip.o \ diff --git a/prec/impl/psb_cilu_fct.f90 b/prec/impl/psb_cilu_fct.f90 deleted file mode 100644 index d54769bf..00000000 --- a/prec/impl/psb_cilu_fct.f90 +++ /dev/null @@ -1,438 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_cilu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_cspmat_type),intent(in) :: a - type(psb_c_csr_sparse_mat),intent(inout) :: l,u - type(psb_cspmat_type),intent(in), optional, target :: blck - complex(psb_spk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1, l2,m,err_act - type(psb_cspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_cilu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_cilu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_cilu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - implicit none - - type(psb_cspmat_type) :: a,b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - complex(psb_spk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - complex(psb_spk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_c_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - name='psb_cilu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = czero - - ! - ! - select type(aa => a%a) - type is (psb_c_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = cone/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = czero - - select type(aa => b%a) - type is (psb_c_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = cone/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_cilu_fctint -end subroutine psb_cilu_fct diff --git a/prec/impl/psb_dilu_fct.f90 b/prec/impl/psb_dilu_fct.f90 deleted file mode 100644 index b97b88ec..00000000 --- a/prec/impl/psb_dilu_fct.f90 +++ /dev/null @@ -1,441 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_dilu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_dspmat_type),intent(in) :: a - type(psb_d_csr_sparse_mat),intent(inout) :: l,u - type(psb_dspmat_type),intent(in), optional, target :: blck - real(psb_dpk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1,l2,m,err_act - type(psb_dspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_dilu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_dilu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_dilu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - use psb_mat_mod - - implicit none - - type(psb_dspmat_type), target :: a - type(psb_dspmat_type), target :: b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - real(psb_dpk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - real(psb_dpk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_d_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - - name='psb_dilu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = dzero - ! - ! - select type(aa => a%a) - type is (psb_d_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (dabs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') dia - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = done/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = dzero - - select type(aa => b%a) - type is (psb_d_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (dabs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') dia - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = done/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_dilu_fctint -end subroutine psb_dilu_fct diff --git a/prec/impl/psb_silu_fct.f90 b/prec/impl/psb_silu_fct.f90 deleted file mode 100644 index 85b58bad..00000000 --- a/prec/impl/psb_silu_fct.f90 +++ /dev/null @@ -1,440 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_silu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_sspmat_type),intent(in) :: a - type(psb_s_csr_sparse_mat),intent(inout) :: l,u - type(psb_sspmat_type),intent(in), optional, target :: blck - real(psb_spk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1,l2,m,err_act - type(psb_sspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_silu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_silu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_silu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - use psb_mat_mod - - implicit none - - type(psb_sspmat_type) :: a - type(psb_sspmat_type) :: b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - real(psb_spk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - real(psb_spk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_s_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - name='psb_silu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = szero - ! - ! - select type(aa => a%a) - type is (psb_s_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') dia - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = sone/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = szero - - select type(aa => b%a) - type is (psb_s_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') dia - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = sone/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_silu_fctint -end subroutine psb_silu_fct diff --git a/prec/impl/psb_zilu_fct.f90 b/prec/impl/psb_zilu_fct.f90 deleted file mode 100644 index e5ea4b0d..00000000 --- a/prec/impl/psb_zilu_fct.f90 +++ /dev/null @@ -1,438 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_zilu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_zspmat_type),intent(in) :: a - type(psb_z_csr_sparse_mat),intent(inout) :: l,u - type(psb_zspmat_type),intent(in), optional, target :: blck - complex(psb_dpk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1, l2,m,err_act - type(psb_zspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_zilu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_zilu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_zilu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - implicit none - - type(psb_zspmat_type) :: a,b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - complex(psb_dpk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - complex(psb_dpk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_z_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - name='psb_zilu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = zzero - - ! - ! - select type(aa => a%a) - type is (psb_z_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = zone/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = zzero - - select type(aa => b%a) - type is (psb_z_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = zone/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_zilu_fctint -end subroutine psb_zilu_fct From 497cd3101801585ea2879bb60297dd6424cc49ba Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 16 Jul 2024 13:14:02 +0200 Subject: [PATCH 08/12] Fix configure --- configure | 1 - 1 file changed, 1 deletion(-) diff --git a/configure b/configure index 0832aa87..0c59d9fd 100755 --- a/configure +++ b/configure @@ -7550,7 +7550,6 @@ fi if test x"$pac_cv_serial_mpi" == x"yes" ; then FDEFINES="$psblas_cv_define_prepend-DSERIAL_MPI $psblas_cv_define_prepend-DMPI_MOD $FDEFINES"; CDEFINES="-DSERIAL_MPI $CDEFINES" - CXXDEFINES="-DSERIAL_MPI $CXXDEFINES" else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking MPI Fortran 2008 interface" >&5 printf %s "checking MPI Fortran 2008 interface... " >&6; } From ab38a91d10f9c37b81a7a6663da2622345529f2e Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 16 Jul 2024 13:53:00 +0200 Subject: [PATCH 09/12] Fix metis interfacing --- util/psb_metis_int.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/psb_metis_int.c b/util/psb_metis_int.c index 5f451cb7..4d492407 100644 --- a/util/psb_metis_int.c +++ b/util/psb_metis_int.c @@ -26,7 +26,7 @@ int metis_PartGraphKway_C(idx_t *n, idx_t *ixadj, idx_t *iadj, idx_t *ivwg, /* NULL,NULL,NULL,(idx_t *)nparts,NULL,NULL,NULL, */ /* &objval,(idx_t *)graphpart); */ res = METIS_PartGraphKway((idx_t*)n,(idx_t *)&ncon,(idx_t *)ixadj,(idx_t *)iadj, - NULL,NULL,NULL,(idx_t *)nparts,weights,NULL,options, + NULL,NULL,NULL,(idx_t *)nparts,(void *)weights,NULL,options, &objval,(idx_t *)graphpart); } if (res == METIS_OK) { From e8491380e2853e9ae8ae373cd66315c42d555592 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 19 Jul 2024 09:40:14 +0200 Subject: [PATCH 10/12] Take out obsolete test targets from makefile --- test/hello/Makefile | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/test/hello/Makefile b/test/hello/Makefile index f16ff75e..cbdd358f 100644 --- a/test/hello/Makefile +++ b/test/hello/Makefile @@ -16,7 +16,7 @@ FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG). EXEDIR=./runs -all: runsd hello pingpong tsum tsum1 +all: runsd hello pingpong runsd: (if test ! -d runs ; then mkdir runs; fi) @@ -28,13 +28,6 @@ hello: hello.o pingpong: pingpong.o $(FLINK) pingpong.o -o pingpong $(PSBLAS_LIB) $(LDLIBS) /bin/mv pingpong $(EXEDIR) -tsum: tsum.o - $(FLINK) tsum.o -o tsum $(PSBLAS_LIB) $(LDLIBS) - /bin/mv tsum $(EXEDIR) -tsum1: tsum1.o - $(FLINK) tsum1.o -o tsum1 $(PSBLAS_LIB) $(LDLIBS) - /bin/mv tsum1 $(EXEDIR) - clean: From 4461b44eda4943182b725ee75db2bebfb3724fdc Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 29 Jul 2024 16:59:27 +0200 Subject: [PATCH 11/12] Change name abgdxyz into upd_xyz --- base/modules/auxil/psi_c_serial_mod.f90 | 8 +- base/modules/auxil/psi_d_serial_mod.f90 | 8 +- base/modules/auxil/psi_e_serial_mod.f90 | 8 +- base/modules/auxil/psi_i2_serial_mod.f90 | 8 +- base/modules/auxil/psi_m_serial_mod.f90 | 8 +- base/modules/auxil/psi_s_serial_mod.f90 | 8 +- base/modules/auxil/psi_z_serial_mod.f90 | 8 +- base/modules/psblas/psb_c_psblas_mod.F90 | 8 +- base/modules/psblas/psb_d_psblas_mod.F90 | 8 +- base/modules/psblas/psb_s_psblas_mod.F90 | 8 +- base/modules/psblas/psb_z_psblas_mod.F90 | 8 +- base/modules/serial/psb_c_base_vect_mod.F90 | 14 +- base/modules/serial/psb_c_vect_mod.F90 | 8 +- base/modules/serial/psb_d_base_vect_mod.F90 | 14 +- base/modules/serial/psb_d_vect_mod.F90 | 8 +- base/modules/serial/psb_s_base_vect_mod.F90 | 14 +- base/modules/serial/psb_s_vect_mod.F90 | 8 +- base/modules/serial/psb_z_base_vect_mod.F90 | 14 +- base/modules/serial/psb_z_vect_mod.F90 | 8 +- base/psblas/psb_caxpby.f90 | 8 +- base/psblas/psb_daxpby.f90 | 8 +- base/psblas/psb_saxpby.f90 | 8 +- base/psblas/psb_zaxpby.f90 | 8 +- base/serial/psi_c_serial_impl.F90 | 8 +- base/serial/psi_d_serial_impl.F90 | 8 +- base/serial/psi_e_serial_impl.F90 | 8 +- base/serial/psi_i2_serial_impl.F90 | 8 +- base/serial/psi_m_serial_impl.F90 | 8 +- base/serial/psi_s_serial_impl.F90 | 8 +- base/serial/psi_z_serial_impl.F90 | 8 +- cuda/cvectordev.c | 4 +- cuda/cvectordev.h | 2 +- cuda/dvectordev.c | 4 +- cuda/dvectordev.h | 2 +- cuda/psb_c_cuda_vect_mod.F90 | 8 +- cuda/psb_c_vectordev_mod.F90 | 8 +- cuda/psb_d_cuda_vect_mod.F90 | 8 +- cuda/psb_d_vectordev_mod.F90 | 8 +- cuda/psb_s_cuda_vect_mod.F90 | 8 +- cuda/psb_s_vectordev_mod.F90 | 8 +- cuda/psb_z_cuda_vect_mod.F90 | 8 +- cuda/psb_z_vectordev_mod.F90 | 8 +- cuda/spgpu/kernels/Makefile | 8 +- .../kernels/{cabgdxyz.cu => cupd_xyz.cu} | 6 +- .../kernels/{dabgdxyz.cu => dupd_xyz.cu} | 6 +- .../kernels/{sabgdxyz.cu => supd_xyz.cu} | 6 +- .../kernels/{zabgdxyz.cu => zupd_xyz.cu} | 6 +- cuda/spgpu/vector.h | 8 +- cuda/svectordev.c | 4 +- cuda/svectordev.h | 2 +- cuda/zvectordev.c | 4 +- cuda/zvectordev.h | 2 +- krylov/psb_dcg.F90 | 31 +- test/cudakern/Makefile | 36 +- test/cudakern/c_file_spmv.F90 | 491 ----------------- test/cudakern/d_file_spmv.F90 | 496 ------------------ test/cudakern/s_file_spmv.F90 | 496 ------------------ test/cudakern/z_file_spmv.F90 | 491 ----------------- 58 files changed, 207 insertions(+), 2226 deletions(-) rename cuda/spgpu/kernels/{cabgdxyz.cu => cupd_xyz.cu} (88%) rename cuda/spgpu/kernels/{dabgdxyz.cu => dupd_xyz.cu} (88%) rename cuda/spgpu/kernels/{sabgdxyz.cu => supd_xyz.cu} (88%) rename cuda/spgpu/kernels/{zabgdxyz.cu => zupd_xyz.cu} (88%) delete mode 100644 test/cudakern/c_file_spmv.F90 delete mode 100644 test/cudakern/d_file_spmv.F90 delete mode 100644 test/cudakern/s_file_spmv.F90 delete mode 100644 test/cudakern/z_file_spmv.F90 diff --git a/base/modules/auxil/psi_c_serial_mod.f90 b/base/modules/auxil/psi_c_serial_mod.f90 index 38b740a7..3fe001c8 100644 --- a/base/modules/auxil/psi_c_serial_mod.f90 +++ b/base/modules/auxil/psi_c_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_c_serial_mod end subroutine psi_caxpbyv2 end interface psb_geaxpby - interface psi_abgdxyz - subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_upd_xyz + subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_spk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_c_serial_mod complex(psb_spk_), intent (inout) :: z(:) complex(psb_spk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_cabgdxyz - end interface psi_abgdxyz + end subroutine psi_c_upd_xyz + end interface psi_upd_xyz interface psi_xyzw subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_d_serial_mod.f90 b/base/modules/auxil/psi_d_serial_mod.f90 index 1d65c5f6..a08263df 100644 --- a/base/modules/auxil/psi_d_serial_mod.f90 +++ b/base/modules/auxil/psi_d_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_d_serial_mod end subroutine psi_daxpbyv2 end interface psb_geaxpby - interface psi_abgdxyz - subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_upd_xyz + subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_dpk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_d_serial_mod real(psb_dpk_), intent (inout) :: z(:) real(psb_dpk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_dabgdxyz - end interface psi_abgdxyz + end subroutine psi_d_upd_xyz + end interface psi_upd_xyz interface psi_xyzw subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_e_serial_mod.f90 b/base/modules/auxil/psi_e_serial_mod.f90 index 6f4e8c06..1f1bebd7 100644 --- a/base/modules/auxil/psi_e_serial_mod.f90 +++ b/base/modules/auxil/psi_e_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_e_serial_mod end subroutine psi_eaxpbyv2 end interface psb_geaxpby - interface psi_abgdxyz - subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_upd_xyz + subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_e_serial_mod integer(psb_epk_), intent (inout) :: z(:) integer(psb_epk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_eabgdxyz - end interface psi_abgdxyz + end subroutine psi_e_upd_xyz + end interface psi_upd_xyz interface psi_xyzw subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_i2_serial_mod.f90 b/base/modules/auxil/psi_i2_serial_mod.f90 index ffa14059..770d3256 100644 --- a/base/modules/auxil/psi_i2_serial_mod.f90 +++ b/base/modules/auxil/psi_i2_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_i2_serial_mod end subroutine psi_i2axpbyv2 end interface psb_geaxpby - interface psi_abgdxyz - subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_upd_xyz + subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_i2_serial_mod integer(psb_i2pk_), intent (inout) :: z(:) integer(psb_i2pk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_i2abgdxyz - end interface psi_abgdxyz + end subroutine psi_i2_upd_xyz + end interface psi_upd_xyz interface psi_xyzw subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_m_serial_mod.f90 b/base/modules/auxil/psi_m_serial_mod.f90 index 5661fdbf..3583cccc 100644 --- a/base/modules/auxil/psi_m_serial_mod.f90 +++ b/base/modules/auxil/psi_m_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_m_serial_mod end subroutine psi_maxpbyv2 end interface psb_geaxpby - interface psi_abgdxyz - subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_upd_xyz + subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_m_serial_mod integer(psb_mpk_), intent (inout) :: z(:) integer(psb_mpk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_mabgdxyz - end interface psi_abgdxyz + end subroutine psi_m_upd_xyz + end interface psi_upd_xyz interface psi_xyzw subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_s_serial_mod.f90 b/base/modules/auxil/psi_s_serial_mod.f90 index 5cc17d58..3e0c6d91 100644 --- a/base/modules/auxil/psi_s_serial_mod.f90 +++ b/base/modules/auxil/psi_s_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_s_serial_mod end subroutine psi_saxpbyv2 end interface psb_geaxpby - interface psi_abgdxyz - subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_upd_xyz + subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_spk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_s_serial_mod real(psb_spk_), intent (inout) :: z(:) real(psb_spk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_sabgdxyz - end interface psi_abgdxyz + end subroutine psi_s_upd_xyz + end interface psi_upd_xyz interface psi_xyzw subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_z_serial_mod.f90 b/base/modules/auxil/psi_z_serial_mod.f90 index 8a3f053d..a8ea734e 100644 --- a/base/modules/auxil/psi_z_serial_mod.f90 +++ b/base/modules/auxil/psi_z_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_z_serial_mod end subroutine psi_zaxpbyv2 end interface psb_geaxpby - interface psi_abgdxyz - subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_upd_xyz + subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_dpk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_z_serial_mod complex(psb_dpk_), intent (inout) :: z(:) complex(psb_dpk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_zabgdxyz - end interface psi_abgdxyz + end subroutine psi_z_upd_xyz + end interface psi_upd_xyz interface psi_xyzw subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/psblas/psb_c_psblas_mod.F90 b/base/modules/psblas/psb_c_psblas_mod.F90 index 7f7f937c..591dec09 100644 --- a/base/modules/psblas/psb_c_psblas_mod.F90 +++ b/base/modules/psblas/psb_c_psblas_mod.F90 @@ -143,8 +143,8 @@ module psb_c_psblas_mod end subroutine psb_caxpby end interface - interface psb_abgdxyz - subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + interface psb_upd_xyz + subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_spk_, psb_ipk_, & & psb_c_vect_type, psb_cspmat_type @@ -154,8 +154,8 @@ module psb_c_psblas_mod complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info - end subroutine psb_cabgdxyz_vect - end interface psb_abgdxyz + end subroutine psb_c_upd_xyz_vect + end interface psb_upd_xyz interface psb_geamax function psb_camax(x, desc_a, info, jx,global) diff --git a/base/modules/psblas/psb_d_psblas_mod.F90 b/base/modules/psblas/psb_d_psblas_mod.F90 index 12090956..b200bc8a 100644 --- a/base/modules/psblas/psb_d_psblas_mod.F90 +++ b/base/modules/psblas/psb_d_psblas_mod.F90 @@ -143,8 +143,8 @@ module psb_d_psblas_mod end subroutine psb_daxpby end interface - interface psb_abgdxyz - subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + interface psb_upd_xyz + subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_dpk_, psb_ipk_, & & psb_d_vect_type, psb_dspmat_type @@ -154,8 +154,8 @@ module psb_d_psblas_mod real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info - end subroutine psb_dabgdxyz_vect - end interface psb_abgdxyz + end subroutine psb_d_upd_xyz_vect + end interface psb_upd_xyz interface psb_geamax function psb_damax(x, desc_a, info, jx,global) diff --git a/base/modules/psblas/psb_s_psblas_mod.F90 b/base/modules/psblas/psb_s_psblas_mod.F90 index 7a7ce783..a60da025 100644 --- a/base/modules/psblas/psb_s_psblas_mod.F90 +++ b/base/modules/psblas/psb_s_psblas_mod.F90 @@ -143,8 +143,8 @@ module psb_s_psblas_mod end subroutine psb_saxpby end interface - interface psb_abgdxyz - subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + interface psb_upd_xyz + subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_spk_, psb_ipk_, & & psb_s_vect_type, psb_sspmat_type @@ -154,8 +154,8 @@ module psb_s_psblas_mod real(psb_spk_), intent (in) :: alpha, beta, gamma, delta type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info - end subroutine psb_sabgdxyz_vect - end interface psb_abgdxyz + end subroutine psb_s_upd_xyz_vect + end interface psb_upd_xyz interface psb_geamax function psb_samax(x, desc_a, info, jx,global) diff --git a/base/modules/psblas/psb_z_psblas_mod.F90 b/base/modules/psblas/psb_z_psblas_mod.F90 index bcfe9caa..241df2b9 100644 --- a/base/modules/psblas/psb_z_psblas_mod.F90 +++ b/base/modules/psblas/psb_z_psblas_mod.F90 @@ -143,8 +143,8 @@ module psb_z_psblas_mod end subroutine psb_zaxpby end interface - interface psb_abgdxyz - subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + interface psb_upd_xyz + subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_dpk_, psb_ipk_, & & psb_z_vect_type, psb_zspmat_type @@ -154,8 +154,8 @@ module psb_z_psblas_mod complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info - end subroutine psb_zabgdxyz_vect - end interface psb_abgdxyz + end subroutine psb_z_upd_xyz_vect + end interface psb_upd_xyz interface psb_geamax function psb_zamax(x, desc_a, info, jx,global) diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90 index 41bab5ab..4dac86d6 100644 --- a/base/modules/serial/psb_c_base_vect_mod.F90 +++ b/base/modules/serial/psb_c_base_vect_mod.F90 @@ -155,7 +155,7 @@ module psb_c_base_vect_mod procedure, pass(z) :: axpby_v2 => c_base_axpby_v2 procedure, pass(z) :: axpby_a2 => c_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => c_base_abgdxyz + procedure, pass(z) :: upd_xyz => c_base_upd_xyz procedure, pass(w) :: xyzw => c_base_xyzw ! @@ -1130,12 +1130,12 @@ contains end subroutine c_base_axpby_a2 ! - ! ABGDXYZ is invoked via Z, hence the structure below. + ! UPD_XYZ is invoked via Z, hence the structure below. ! ! - !> Function base_abgdxyz + !> Function base_upd_xyz !! \memberof psb_c_base_vect_type - !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param beta scalar beta @@ -1146,7 +1146,7 @@ contains !! \param z The class(base_vect) to be added !! \param info return code !! - subroutine c_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine c_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -1159,11 +1159,11 @@ contains if (x%is_dev().and.(alpha/=czero)) call x%sync() if (y%is_dev().and.(beta/=czero)) call y%sync() if (z%is_dev().and.(delta/=czero)) call z%sync() - call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) call y%set_host() call z%set_host() - end subroutine c_base_abgdxyz + end subroutine c_base_upd_xyz subroutine c_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) use psi_serial_mod diff --git a/base/modules/serial/psb_c_vect_mod.F90 b/base/modules/serial/psb_c_vect_mod.F90 index 865f9456..1e9510f2 100644 --- a/base/modules/serial/psb_c_vect_mod.F90 +++ b/base/modules/serial/psb_c_vect_mod.F90 @@ -102,7 +102,7 @@ module psb_c_vect_mod procedure, pass(z) :: axpby_v2 => c_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => c_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => c_vect_abgdxyz + procedure, pass(z) :: upd_xyz => c_vect_upd_xyz procedure, pass(z) :: xyzw => c_vect_xyzw procedure, pass(y) :: mlt_v => c_vect_mlt_v @@ -774,7 +774,7 @@ contains end subroutine c_vect_axpby_a2 - subroutine c_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + subroutine c_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -785,9 +785,9 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) - end subroutine c_vect_abgdxyz + end subroutine c_vect_upd_xyz subroutine c_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) use psi_serial_mod diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90 index 1ad1ffa5..f07b5aed 100644 --- a/base/modules/serial/psb_d_base_vect_mod.F90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -155,7 +155,7 @@ module psb_d_base_vect_mod procedure, pass(z) :: axpby_v2 => d_base_axpby_v2 procedure, pass(z) :: axpby_a2 => d_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => d_base_abgdxyz + procedure, pass(z) :: upd_xyz => d_base_upd_xyz procedure, pass(w) :: xyzw => d_base_xyzw ! @@ -1137,12 +1137,12 @@ contains end subroutine d_base_axpby_a2 ! - ! ABGDXYZ is invoked via Z, hence the structure below. + ! UPD_XYZ is invoked via Z, hence the structure below. ! ! - !> Function base_abgdxyz + !> Function base_upd_xyz !! \memberof psb_d_base_vect_type - !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param beta scalar beta @@ -1153,7 +1153,7 @@ contains !! \param z The class(base_vect) to be added !! \param info return code !! - subroutine d_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine d_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -1166,11 +1166,11 @@ contains if (x%is_dev().and.(alpha/=dzero)) call x%sync() if (y%is_dev().and.(beta/=dzero)) call y%sync() if (z%is_dev().and.(delta/=dzero)) call z%sync() - call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) call y%set_host() call z%set_host() - end subroutine d_base_abgdxyz + end subroutine d_base_upd_xyz subroutine d_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) use psi_serial_mod diff --git a/base/modules/serial/psb_d_vect_mod.F90 b/base/modules/serial/psb_d_vect_mod.F90 index 55dd8230..ae3062dd 100644 --- a/base/modules/serial/psb_d_vect_mod.F90 +++ b/base/modules/serial/psb_d_vect_mod.F90 @@ -102,7 +102,7 @@ module psb_d_vect_mod procedure, pass(z) :: axpby_v2 => d_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => d_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => d_vect_abgdxyz + procedure, pass(z) :: upd_xyz => d_vect_upd_xyz procedure, pass(z) :: xyzw => d_vect_xyzw procedure, pass(y) :: mlt_v => d_vect_mlt_v @@ -781,7 +781,7 @@ contains end subroutine d_vect_axpby_a2 - subroutine d_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + subroutine d_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -792,9 +792,9 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) - end subroutine d_vect_abgdxyz + end subroutine d_vect_upd_xyz subroutine d_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) use psi_serial_mod diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90 index 26b82c31..596cd634 100644 --- a/base/modules/serial/psb_s_base_vect_mod.F90 +++ b/base/modules/serial/psb_s_base_vect_mod.F90 @@ -155,7 +155,7 @@ module psb_s_base_vect_mod procedure, pass(z) :: axpby_v2 => s_base_axpby_v2 procedure, pass(z) :: axpby_a2 => s_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => s_base_abgdxyz + procedure, pass(z) :: upd_xyz => s_base_upd_xyz procedure, pass(w) :: xyzw => s_base_xyzw ! @@ -1137,12 +1137,12 @@ contains end subroutine s_base_axpby_a2 ! - ! ABGDXYZ is invoked via Z, hence the structure below. + ! UPD_XYZ is invoked via Z, hence the structure below. ! ! - !> Function base_abgdxyz + !> Function base_upd_xyz !! \memberof psb_s_base_vect_type - !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param beta scalar beta @@ -1153,7 +1153,7 @@ contains !! \param z The class(base_vect) to be added !! \param info return code !! - subroutine s_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine s_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -1166,11 +1166,11 @@ contains if (x%is_dev().and.(alpha/=szero)) call x%sync() if (y%is_dev().and.(beta/=szero)) call y%sync() if (z%is_dev().and.(delta/=szero)) call z%sync() - call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) call y%set_host() call z%set_host() - end subroutine s_base_abgdxyz + end subroutine s_base_upd_xyz subroutine s_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) use psi_serial_mod diff --git a/base/modules/serial/psb_s_vect_mod.F90 b/base/modules/serial/psb_s_vect_mod.F90 index a50b2a0a..cad4659c 100644 --- a/base/modules/serial/psb_s_vect_mod.F90 +++ b/base/modules/serial/psb_s_vect_mod.F90 @@ -102,7 +102,7 @@ module psb_s_vect_mod procedure, pass(z) :: axpby_v2 => s_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => s_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => s_vect_abgdxyz + procedure, pass(z) :: upd_xyz => s_vect_upd_xyz procedure, pass(z) :: xyzw => s_vect_xyzw procedure, pass(y) :: mlt_v => s_vect_mlt_v @@ -781,7 +781,7 @@ contains end subroutine s_vect_axpby_a2 - subroutine s_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + subroutine s_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -792,9 +792,9 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) - end subroutine s_vect_abgdxyz + end subroutine s_vect_upd_xyz subroutine s_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) use psi_serial_mod diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90 index a3afc9c1..1bbdfba1 100644 --- a/base/modules/serial/psb_z_base_vect_mod.F90 +++ b/base/modules/serial/psb_z_base_vect_mod.F90 @@ -155,7 +155,7 @@ module psb_z_base_vect_mod procedure, pass(z) :: axpby_v2 => z_base_axpby_v2 procedure, pass(z) :: axpby_a2 => z_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => z_base_abgdxyz + procedure, pass(z) :: upd_xyz => z_base_upd_xyz procedure, pass(w) :: xyzw => z_base_xyzw ! @@ -1130,12 +1130,12 @@ contains end subroutine z_base_axpby_a2 ! - ! ABGDXYZ is invoked via Z, hence the structure below. + ! UPD_XYZ is invoked via Z, hence the structure below. ! ! - !> Function base_abgdxyz + !> Function base_upd_xyz !! \memberof psb_z_base_vect_type - !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param beta scalar beta @@ -1146,7 +1146,7 @@ contains !! \param z The class(base_vect) to be added !! \param info return code !! - subroutine z_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine z_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -1159,11 +1159,11 @@ contains if (x%is_dev().and.(alpha/=zzero)) call x%sync() if (y%is_dev().and.(beta/=zzero)) call y%sync() if (z%is_dev().and.(delta/=zzero)) call z%sync() - call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) call y%set_host() call z%set_host() - end subroutine z_base_abgdxyz + end subroutine z_base_upd_xyz subroutine z_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) use psi_serial_mod diff --git a/base/modules/serial/psb_z_vect_mod.F90 b/base/modules/serial/psb_z_vect_mod.F90 index 21e0c546..48f2e947 100644 --- a/base/modules/serial/psb_z_vect_mod.F90 +++ b/base/modules/serial/psb_z_vect_mod.F90 @@ -102,7 +102,7 @@ module psb_z_vect_mod procedure, pass(z) :: axpby_v2 => z_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => z_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => z_vect_abgdxyz + procedure, pass(z) :: upd_xyz => z_vect_upd_xyz procedure, pass(z) :: xyzw => z_vect_xyzw procedure, pass(y) :: mlt_v => z_vect_mlt_v @@ -774,7 +774,7 @@ contains end subroutine z_vect_axpby_a2 - subroutine z_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + subroutine z_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -785,9 +785,9 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) - end subroutine z_vect_abgdxyz + end subroutine z_vect_upd_xyz subroutine z_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) use psi_serial_mod diff --git a/base/psblas/psb_caxpby.f90 b/base/psblas/psb_caxpby.f90 index 7c22bb06..5d80ef00 100644 --- a/base/psblas/psb_caxpby.f90 +++ b/base/psblas/psb_caxpby.f90 @@ -743,9 +743,9 @@ subroutine psb_caddconst_vect(x,b,z,desc_a,info) end subroutine psb_caddconst_vect -subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& +subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - use psb_base_mod, psb_protect_name => psb_cabgdxyz_vect + use psb_base_mod, psb_protect_name => psb_c_upd_xyz_vect implicit none type(psb_c_vect_type), intent (inout) :: x type(psb_c_vect_type), intent (inout) :: y @@ -812,7 +812,7 @@ subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) + call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) @@ -822,5 +822,5 @@ subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& return -end subroutine psb_cabgdxyz_vect +end subroutine psb_c_upd_xyz_vect diff --git a/base/psblas/psb_daxpby.f90 b/base/psblas/psb_daxpby.f90 index 1de77647..38ebe465 100644 --- a/base/psblas/psb_daxpby.f90 +++ b/base/psblas/psb_daxpby.f90 @@ -743,9 +743,9 @@ subroutine psb_daddconst_vect(x,b,z,desc_a,info) end subroutine psb_daddconst_vect -subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& +subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - use psb_base_mod, psb_protect_name => psb_dabgdxyz_vect + use psb_base_mod, psb_protect_name => psb_d_upd_xyz_vect implicit none type(psb_d_vect_type), intent (inout) :: x type(psb_d_vect_type), intent (inout) :: y @@ -812,7 +812,7 @@ subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) + call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) @@ -822,5 +822,5 @@ subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& return -end subroutine psb_dabgdxyz_vect +end subroutine psb_d_upd_xyz_vect diff --git a/base/psblas/psb_saxpby.f90 b/base/psblas/psb_saxpby.f90 index 1b1f24e6..0055fdbe 100644 --- a/base/psblas/psb_saxpby.f90 +++ b/base/psblas/psb_saxpby.f90 @@ -743,9 +743,9 @@ subroutine psb_saddconst_vect(x,b,z,desc_a,info) end subroutine psb_saddconst_vect -subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& +subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - use psb_base_mod, psb_protect_name => psb_sabgdxyz_vect + use psb_base_mod, psb_protect_name => psb_s_upd_xyz_vect implicit none type(psb_s_vect_type), intent (inout) :: x type(psb_s_vect_type), intent (inout) :: y @@ -812,7 +812,7 @@ subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) + call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) @@ -822,5 +822,5 @@ subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& return -end subroutine psb_sabgdxyz_vect +end subroutine psb_s_upd_xyz_vect diff --git a/base/psblas/psb_zaxpby.f90 b/base/psblas/psb_zaxpby.f90 index 0f37a1f4..e93488e3 100644 --- a/base/psblas/psb_zaxpby.f90 +++ b/base/psblas/psb_zaxpby.f90 @@ -743,9 +743,9 @@ subroutine psb_zaddconst_vect(x,b,z,desc_a,info) end subroutine psb_zaddconst_vect -subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& +subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - use psb_base_mod, psb_protect_name => psb_zabgdxyz_vect + use psb_base_mod, psb_protect_name => psb_z_upd_xyz_vect implicit none type(psb_z_vect_type), intent (inout) :: x type(psb_z_vect_type), intent (inout) :: y @@ -812,7 +812,7 @@ subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) + call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) @@ -822,5 +822,5 @@ subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& return -end subroutine psb_zabgdxyz_vect +end subroutine psb_z_upd_xyz_vect diff --git a/base/serial/psi_c_serial_impl.F90 b/base/serial/psi_c_serial_impl.F90 index e230a1e0..e3f1d9a3 100644 --- a/base/serial/psi_c_serial_impl.F90 +++ b/base/serial/psi_c_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine caxpbyv2 -subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='cabgdxyz' + name='c_upd_xyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_cabgdxyz +end subroutine psi_c_upd_xyz subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='cabgdxyz' + name='c_xyzw' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_d_serial_impl.F90 b/base/serial/psi_d_serial_impl.F90 index bf1b2917..d6a9a31d 100644 --- a/base/serial/psi_d_serial_impl.F90 +++ b/base/serial/psi_d_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine daxpbyv2 -subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='dabgdxyz' + name='d_upd_xyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_dabgdxyz +end subroutine psi_d_upd_xyz subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='dabgdxyz' + name='d_xyzw' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_e_serial_impl.F90 b/base/serial/psi_e_serial_impl.F90 index 911ab4ec..37b11a94 100644 --- a/base/serial/psi_e_serial_impl.F90 +++ b/base/serial/psi_e_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine eaxpbyv2 -subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='eabgdxyz' + name='e_upd_xyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_eabgdxyz +end subroutine psi_e_upd_xyz subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='eabgdxyz' + name='e_xyzw' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_i2_serial_impl.F90 b/base/serial/psi_i2_serial_impl.F90 index fb42dfcd..c20cd60b 100644 --- a/base/serial/psi_i2_serial_impl.F90 +++ b/base/serial/psi_i2_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine i2axpbyv2 -subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='i2abgdxyz' + name='i2_upd_xyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_i2abgdxyz +end subroutine psi_i2_upd_xyz subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='i2abgdxyz' + name='i2_xyzw' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_m_serial_impl.F90 b/base/serial/psi_m_serial_impl.F90 index 346fd897..55913a16 100644 --- a/base/serial/psi_m_serial_impl.F90 +++ b/base/serial/psi_m_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine maxpbyv2 -subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='mabgdxyz' + name='m_upd_xyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_mabgdxyz +end subroutine psi_m_upd_xyz subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='mabgdxyz' + name='m_xyzw' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_s_serial_impl.F90 b/base/serial/psi_s_serial_impl.F90 index 52f86bcd..c3846c8e 100644 --- a/base/serial/psi_s_serial_impl.F90 +++ b/base/serial/psi_s_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine saxpbyv2 -subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='sabgdxyz' + name='s_upd_xyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_sabgdxyz +end subroutine psi_s_upd_xyz subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='sabgdxyz' + name='s_xyzw' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_z_serial_impl.F90 b/base/serial/psi_z_serial_impl.F90 index 7e680273..763eae22 100644 --- a/base/serial/psi_z_serial_impl.F90 +++ b/base/serial/psi_z_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine zaxpbyv2 -subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='zabgdxyz' + name='z_upd_xyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_zabgdxyz +end subroutine psi_z_upd_xyz subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='zabgdxyz' + name='z_xyzw' info = psb_success_ if (m.lt.0) then diff --git a/cuda/cvectordev.c b/cuda/cvectordev.c index cdfda481..65d41893 100644 --- a/cuda/cvectordev.c +++ b/cuda/cvectordev.c @@ -255,7 +255,7 @@ int axpbyMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha, void* devMultiVe return(i); } -int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta, +int upd_xyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta, cuFloatComplex gamma, cuFloatComplex delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) { int j=0, i=0; @@ -268,7 +268,7 @@ int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex if ((n > devVecY->size_) || (n>devVecX->size_ )) return SPGPU_UNSUPPORTED; - spgpuCabgdxyz(handle,n, alpha,beta,gamma,delta, + spgpuCupd_xyz(handle,n, alpha,beta,gamma,delta, (cuFloatComplex *)devVecX->v_,(cuFloatComplex *) devVecY->v_,(cuFloatComplex *) devVecZ->v_); return(i); } diff --git a/cuda/cvectordev.h b/cuda/cvectordev.h index 62693e27..8c40b95d 100644 --- a/cuda/cvectordev.h +++ b/cuda/cvectordev.h @@ -69,7 +69,7 @@ int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA); int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB); int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY); -int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta, +int upd_xyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta, cuFloatComplex gamma, cuFloatComplex delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); int xyzwMultiVecDeviceFloatComplex(int n,cuFloatComplex a,cuFloatComplex b, diff --git a/cuda/dvectordev.c b/cuda/dvectordev.c index 723f48d8..a69e1b71 100644 --- a/cuda/dvectordev.c +++ b/cuda/dvectordev.c @@ -241,7 +241,7 @@ int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX, return(i); } -int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, +int upd_xyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) { int j=0, i=0; int pitch = 0; @@ -253,7 +253,7 @@ int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, do if ((n > devVecY->size_) || (n>devVecX->size_ )) return SPGPU_UNSUPPORTED; - spgpuDabgdxyz(handle,n, alpha,beta,gamma,delta, + spgpuDupd_xyz(handle,n, alpha,beta,gamma,delta, (double*)devVecX->v_,(double*) devVecY->v_,(double*) devVecZ->v_); return(i); } diff --git a/cuda/dvectordev.h b/cuda/dvectordev.h index c2bfa1b5..3834c0d3 100644 --- a/cuda/dvectordev.h +++ b/cuda/dvectordev.h @@ -67,7 +67,7 @@ int asumMultiVecDeviceDouble(double* y_res, int n, void* devVecA); int dotMultiVecDeviceDouble(double* y_res, int n, void* devVecA, void* devVecB); int axpbyMultiVecDeviceDouble(int n, double alpha, void* devVecX, double beta, void* devVecY); -int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, +int upd_xyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); int xyzwMultiVecDeviceDouble(int n,double a, double b, double c, double d, double e, double f, void* devMultiVecX, void* devMultiVecY, diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index 2c2a4f61..45fafe0a 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -90,7 +90,7 @@ module psb_c_cuda_vect_mod procedure, pass(x) :: dot_a => c_cuda_dot_a procedure, pass(y) :: axpby_v => c_cuda_axpby_v procedure, pass(y) :: axpby_a => c_cuda_axpby_a - procedure, pass(z) :: abgdxyz => c_cuda_abgdxyz + procedure, pass(z) :: upd_xyz => c_cuda_upd_xyz procedure, pass(y) :: mlt_v => c_cuda_mlt_v procedure, pass(y) :: mlt_a => c_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => c_cuda_mlt_a_2 @@ -912,7 +912,7 @@ contains end subroutine c_cuda_axpby_v - subroutine c_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine c_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -946,7 +946,7 @@ contains if ((nx d_cuda_dot_a procedure, pass(y) :: axpby_v => d_cuda_axpby_v procedure, pass(y) :: axpby_a => d_cuda_axpby_a - procedure, pass(z) :: abgdxyz => d_cuda_abgdxyz + procedure, pass(z) :: upd_xyz => d_cuda_upd_xyz procedure, pass(y) :: mlt_v => d_cuda_mlt_v procedure, pass(y) :: mlt_a => d_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => d_cuda_mlt_a_2 @@ -912,7 +912,7 @@ contains end subroutine d_cuda_axpby_v - subroutine d_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine d_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -946,7 +946,7 @@ contains if ((nx s_cuda_dot_a procedure, pass(y) :: axpby_v => s_cuda_axpby_v procedure, pass(y) :: axpby_a => s_cuda_axpby_a - procedure, pass(z) :: abgdxyz => s_cuda_abgdxyz + procedure, pass(z) :: upd_xyz => s_cuda_upd_xyz procedure, pass(y) :: mlt_v => s_cuda_mlt_v procedure, pass(y) :: mlt_a => s_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => s_cuda_mlt_a_2 @@ -912,7 +912,7 @@ contains end subroutine s_cuda_axpby_v - subroutine s_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine s_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -946,7 +946,7 @@ contains if ((nx z_cuda_dot_a procedure, pass(y) :: axpby_v => z_cuda_axpby_v procedure, pass(y) :: axpby_a => z_cuda_axpby_a - procedure, pass(z) :: abgdxyz => z_cuda_abgdxyz + procedure, pass(z) :: upd_xyz => z_cuda_upd_xyz procedure, pass(y) :: mlt_v => z_cuda_mlt_v procedure, pass(y) :: mlt_a => z_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => z_cuda_mlt_a_2 @@ -912,7 +912,7 @@ contains end subroutine z_cuda_axpby_v - subroutine z_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine z_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -946,7 +946,7 @@ contains if ((nxcurrentStream>>>(n, alpha, beta, gamma, delta, + spgpuCupd_xyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, x, y, z); } diff --git a/cuda/spgpu/kernels/dabgdxyz.cu b/cuda/spgpu/kernels/dupd_xyz.cu similarity index 88% rename from cuda/spgpu/kernels/dabgdxyz.cu rename to cuda/spgpu/kernels/dupd_xyz.cu index f2b18e02..38957fe1 100644 --- a/cuda/spgpu/kernels/dabgdxyz.cu +++ b/cuda/spgpu/kernels/dupd_xyz.cu @@ -31,7 +31,7 @@ extern "C" #define BLOCK_SIZE 512 -__global__ void spgpuDabgdxyz_krn(int n, double alpha, double beta, double gamma, double delta, +__global__ void spgpuDupd_xyz_krn(int n, double alpha, double beta, double gamma, double delta, double* x, double *y, double *z) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; @@ -54,7 +54,7 @@ __global__ void spgpuDabgdxyz_krn(int n, double alpha, double beta, double gamma } -void spgpuDabgdxyz(spgpuHandle_t handle, +void spgpuDupd_xyz(spgpuHandle_t handle, int n, double alpha, double beta, @@ -73,7 +73,7 @@ void spgpuDabgdxyz(spgpuHandle_t handle, num_blocks = num_blocks_mp*num_mp; dim3 grid(num_blocks); - spgpuDabgdxyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, + spgpuDupd_xyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, x, y, z); } diff --git a/cuda/spgpu/kernels/sabgdxyz.cu b/cuda/spgpu/kernels/supd_xyz.cu similarity index 88% rename from cuda/spgpu/kernels/sabgdxyz.cu rename to cuda/spgpu/kernels/supd_xyz.cu index 8c137ed3..d4ad1d0e 100644 --- a/cuda/spgpu/kernels/sabgdxyz.cu +++ b/cuda/spgpu/kernels/supd_xyz.cu @@ -31,7 +31,7 @@ extern "C" #define BLOCK_SIZE 512 -__global__ void spgpuSabgdxyz_krn(int n, float alpha, float beta, float gamma, float delta, +__global__ void spgpuSupd_xyz_krn(int n, float alpha, float beta, float gamma, float delta, float* x, float *y, float *z) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; @@ -54,7 +54,7 @@ __global__ void spgpuSabgdxyz_krn(int n, float alpha, float beta, float gamma, f } -void spgpuSabgdxyz(spgpuHandle_t handle, +void spgpuSupd_xyz(spgpuHandle_t handle, int n, float alpha, float beta, @@ -73,7 +73,7 @@ void spgpuSabgdxyz(spgpuHandle_t handle, num_blocks = num_blocks_mp*num_mp; dim3 grid(num_blocks); - spgpuSabgdxyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, + spgpuSupd_xyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, x, y, z); } diff --git a/cuda/spgpu/kernels/zabgdxyz.cu b/cuda/spgpu/kernels/zupd_xyz.cu similarity index 88% rename from cuda/spgpu/kernels/zabgdxyz.cu rename to cuda/spgpu/kernels/zupd_xyz.cu index 48def937..a3936ee9 100644 --- a/cuda/spgpu/kernels/zabgdxyz.cu +++ b/cuda/spgpu/kernels/zupd_xyz.cu @@ -31,7 +31,7 @@ extern "C" #define BLOCK_SIZE 512 -__global__ void spgpuZabgdxyz_krn(int n, cuDoubleComplex alpha, cuDoubleComplex beta, +__global__ void spgpuZupd_xyz_krn(int n, cuDoubleComplex alpha, cuDoubleComplex beta, cuDoubleComplex gamma, cuDoubleComplex delta, cuDoubleComplex * x, cuDoubleComplex *y, cuDoubleComplex *z) { @@ -55,7 +55,7 @@ __global__ void spgpuZabgdxyz_krn(int n, cuDoubleComplex alpha, cuDoubleComplex } -void spgpuZabgdxyz(spgpuHandle_t handle, +void spgpuZupd_xyz(spgpuHandle_t handle, int n, cuDoubleComplex alpha, cuDoubleComplex beta, @@ -74,7 +74,7 @@ void spgpuZabgdxyz(spgpuHandle_t handle, num_blocks = num_blocks_mp*num_mp; dim3 grid(num_blocks); - spgpuZabgdxyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, + spgpuZupd_xyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, x, y, z); } diff --git a/cuda/spgpu/vector.h b/cuda/spgpu/vector.h index d08e6edd..26c3443d 100644 --- a/cuda/spgpu/vector.h +++ b/cuda/spgpu/vector.h @@ -182,7 +182,7 @@ void spgpuSaxpby(spgpuHandle_t handle, __device float* x); -void spgpuSabgdxyz(spgpuHandle_t handle, +void spgpuSupd_xyz(spgpuHandle_t handle, int n, float alpha, float beta, @@ -487,7 +487,7 @@ void spgpuDaxpby(spgpuHandle_t handle, __device double* x); -void spgpuDabgdxyz(spgpuHandle_t handle, +void spgpuDupd_xyz(spgpuHandle_t handle, int n, double alpha, double beta, @@ -789,7 +789,7 @@ void spgpuCaxpby(spgpuHandle_t handle, __device cuFloatComplex* x); -void spgpuCabgdxyz(spgpuHandle_t handle, +void spgpuCupd_xyz(spgpuHandle_t handle, int n, cuFloatComplex alpha, cuFloatComplex beta, @@ -1092,7 +1092,7 @@ void spgpuZaxpby(spgpuHandle_t handle, __device cuDoubleComplex* x); -void spgpuZabgdxyz(spgpuHandle_t handle, +void spgpuZupd_xyz(spgpuHandle_t handle, int n, cuDoubleComplex alpha, cuDoubleComplex beta, diff --git a/cuda/svectordev.c b/cuda/svectordev.c index bf7545bb..cfaef5ce 100644 --- a/cuda/svectordev.c +++ b/cuda/svectordev.c @@ -241,7 +241,7 @@ int axpbyMultiVecDeviceFloat(int n,float alpha, void* devMultiVecX, return(i); } -int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, +int upd_xyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) { int j=0, i=0; int pitch = 0; @@ -253,7 +253,7 @@ int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float if ((n > devVecY->size_) || (n>devVecX->size_ )) return SPGPU_UNSUPPORTED; - spgpuSabgdxyz(handle,n, alpha,beta,gamma,delta, + spgpuSupd_xyz(handle,n, alpha,beta,gamma,delta, (float*)devVecX->v_,(float*) devVecY->v_,(float*) devVecZ->v_); return(i); } diff --git a/cuda/svectordev.h b/cuda/svectordev.h index 363c0108..d5c85f78 100644 --- a/cuda/svectordev.h +++ b/cuda/svectordev.h @@ -67,7 +67,7 @@ int asumMultiVecDeviceFloat(float* y_res, int n, void* devVecA); int dotMultiVecDeviceFloat(float* y_res, int n, void* devVecA, void* devVecB); int axpbyMultiVecDeviceFloat(int n, float alpha, void* devVecX, float beta, void* devVecY); -int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, +int upd_xyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); int xyzwMultiVecDeviceFloat(int n,float a,float b, float c, float d, float e, float f, void* devMultiVecX, void* devMultiVecY, diff --git a/cuda/zvectordev.c b/cuda/zvectordev.c index e9f0cec7..d7d88f1b 100644 --- a/cuda/zvectordev.c +++ b/cuda/zvectordev.c @@ -234,7 +234,7 @@ int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMulti return(i); } -int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, +int upd_xyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, cuDoubleComplex beta, cuDoubleComplex gamma, cuDoubleComplex delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) { int j=0, i=0; @@ -247,7 +247,7 @@ int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, if ((n > devVecY->size_) || (n>devVecX->size_ )) return SPGPU_UNSUPPORTED; - spgpuZabgdxyz(handle,n, alpha,beta,gamma,delta, + spgpuZupd_xyz(handle,n, alpha,beta,gamma,delta, (cuDoubleComplex *)devVecX->v_,(cuDoubleComplex *) devVecY->v_,(cuDoubleComplex *) devVecZ->v_); return(i); } diff --git a/cuda/zvectordev.h b/cuda/zvectordev.h index ae623bdb..e15802f0 100644 --- a/cuda/zvectordev.h +++ b/cuda/zvectordev.h @@ -77,7 +77,7 @@ int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, int axpbyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void* devVecX, cuDoubleComplex beta, void* devVecY); -int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, +int upd_xyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, cuDoubleComplex beta, cuDoubleComplex gamma, cuDoubleComplex delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); int xyzwMultiVecDeviceDoubleComplex(int n,cuDoubleComplex a, cuDoubleComplex b, diff --git a/krylov/psb_dcg.F90 b/krylov/psb_dcg.F90 index caebb712..669573be 100644 --- a/krylov/psb_dcg.F90 +++ b/krylov/psb_dcg.F90 @@ -129,8 +129,6 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& logical :: do_cond character(len=20) :: name character(len=*), parameter :: methdname='CG' - logical, parameter :: do_timings=.true. - integer(psb_ipk_), save :: cg_vect=-1, cg_mv=-1, cg_prec=-1 info = psb_success_ name = 'psb_dcg' @@ -151,12 +149,6 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& call psb_errpush(info,name) goto 9999 endif - if ((do_timings).and.(cg_vect==-1)) & - & cg_vect = psb_get_timer_idx("CG: vector ops ") - if ((do_timings).and.(cg_mv==-1)) & - & cg_mv = psb_get_timer_idx("CG: MV product") - if ((do_timings).and.(cg_prec==-1)) & - & cg_prec = psb_get_timer_idx("CG: preconditioner") mglob = desc_a%get_global_rows() @@ -227,21 +219,17 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& ! = ! = r0 = b-Ax0 ! = - if (do_timings) call psb_tic(cg_vect) if (itx>= itmax_) exit restart + it = 0 call psb_geaxpby(done,b,dzero,r,desc_a,info) - if (do_timings) call psb_toc(cg_vect) - if (do_timings) call psb_tic(cg_mv) if (info == psb_success_) call psb_spmm(-done,a,x,done,r,desc_a,info,work=aux) if (info /= psb_success_) then info=psb_err_from_subroutine_non_ call psb_errpush(info,name) goto 9999 end if - if (do_timings) call psb_toc(cg_mv) - - if (do_timings) call psb_tic(cg_vect) + rho = dzero call psb_init_conv(methdname,istop_,itrace_,itmax_,a,x,b,eps,desc_a,stopdat,info) @@ -249,18 +237,13 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& call psb_errpush(psb_err_from_subroutine_non_,name) goto 9999 End If - if (do_timings) call psb_toc(cg_vect) - + iteration: do it = it + 1 itx = itx + 1 - if (do_timings) call psb_tic(cg_prec) - - call prec%apply(r,z,desc_a,info,work=aux) - if (do_timings) call psb_toc(cg_prec) - if (do_timings) call psb_tic(cg_vect) + call prec%apply(r,z,desc_a,info,work=aux) rho_old = rho rho = psb_gedot(r,z,desc_a,info) @@ -271,18 +254,13 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& if (debug_level >= psb_debug_ext_)& & write(debug_unit,*) me,' ',trim(name),& & ': CG Iteration breakdown rho' - if (do_timings) call psb_toc(cg_vect) exit iteration endif beta = rho/rho_old call psb_geaxpby(done,z,beta,p,desc_a,info) end if - if (do_timings) call psb_toc(cg_vect) - if (do_timings) call psb_tic(cg_mv) call psb_spmm(done,a,p,dzero,q,desc_a,info,work=aux) - if (do_timings) call psb_toc(cg_mv) - if (do_timings) call psb_tic(cg_vect) sigma = psb_gedot(p,q,desc_a,info) if (sigma == dzero) then if (debug_level >= psb_debug_ext_)& @@ -315,7 +293,6 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& end do iteration end do restart - if (do_timings) call psb_toc(cg_vect) if (do_cond) then if (me == psb_root_) then #if defined(HAVE_LAPACK) diff --git a/test/cudakern/Makefile b/test/cudakern/Makefile index 5d938973..41cef197 100755 --- a/test/cudakern/Makefile +++ b/test/cudakern/Makefile @@ -16,48 +16,26 @@ LDLIBS=$(PSBGPULDLIBS) FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_DEFINES) -ZTOBJS=z_file_spmv.o data_input.o -CTOBJS=c_file_spmv.o data_input.o -DTOBJS=d_file_spmv.o data_input.o -STOBJS=s_file_spmv.o data_input.o -DPGOBJS=dpdegenmv.o -SPGOBJS=spdegenmv.o +DPGOBJS=dpdegenmv.o data_input.o +SPGOBJS=spdegenmv.o data_input.o EXEDIR=./runs -all: dir pgen file +all: dir pgen pgen: dpdegenmv spdegenmv -file: s_file_spmv c_file_spmv d_file_spmv z_file_spmv -dpdegenmv spdegenmv s_file_spmv c_file_spmv d_file_spmv z_file_spmv: dir +dpdegenmv spdegenmv: dir dir: (if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi) - -dpdegenmv: $(DPGOBJS) +dpdegenmv.o spdegenmv.o: data_input.o +dpdegenmv: $(DPGOBJS) $(FLINK) $(LOPT) $(DPGOBJS) -fopenmp -o dpdegenmv $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS) /bin/mv dpdegenmv $(EXEDIR) spdegenmv: $(SPGOBJS) $(FLINK) $(LOPT) $(SPGOBJS) -o spdegenmv $(PSBLAS_LIB) $(LDLIBS) /bin/mv spdegenmv $(EXEDIR) -z_file_spmv: $(ZTOBJS) - $(FLINK) $(LOPT) $(ZTOBJS) -o z_file_spmv $(PSBLAS_LIB) $(LDLIBS) - /bin/mv z_file_spmv $(EXEDIR) -c_file_spmv: $(CTOBJS) - $(FLINK) $(LOPT) $(CTOBJS) -o c_file_spmv $(PSBLAS_LIB) $(LDLIBS) - /bin/mv c_file_spmv $(EXEDIR) -d_file_spmv: $(DTOBJS) - $(FLINK) $(LOPT) $(DTOBJS) -o d_file_spmv $(PSBLAS_LIB) $(LDLIBS) - /bin/mv d_file_spmv $(EXEDIR) -s_file_spmv: $(STOBJS) - $(FLINK) $(LOPT) $(STOBJS) -o s_file_spmv $(PSBLAS_LIB) $(LDLIBS) - /bin/cp s_file_spmv $(EXEDIR) - -d_file_spmv.o s_file_spmv.o z_file_spmv.o c_file_spmv.o: data_input.o - clean: /bin/rm -f $(DTOBJS) $(STOBJS) $(DPGOBJS) $(SPGOBJS) $(ZTOBJS) $(CTOBJS) \ - $(EXEDIR)/dpdegenmv $(EXEDIR)/spdegenmv \ - $(EXEDIR)/d_file_spmv $(EXEDIR)/s_file_spmv \ - $(EXEDIR)/z_file_spmv $(EXEDIR)/c_file_spmv + $(EXEDIR)/dpdegenmv $(EXEDIR)/spdegenmv *mod lib: (cd ../../; make library) diff --git a/test/cudakern/c_file_spmv.F90 b/test/cudakern/c_file_spmv.F90 deleted file mode 100644 index 2f9840ec..00000000 --- a/test/cudakern/c_file_spmv.F90 +++ /dev/null @@ -1,491 +0,0 @@ -! -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -program c_file_spmv - use psb_base_mod - use psb_util_mod - use psb_ext_mod -#ifdef HAVE_GPU - use psb_gpu_mod -#endif - use data_input - implicit none - - ! input parameters - character(len=200) :: mtrx_file - - ! sparse matrices - type(psb_cspmat_type) :: a, aux_a, agpu - - ! dense matrices - complex(psb_spk_), allocatable, target :: aux_b(:,:), d(:) - complex(psb_spk_), allocatable , save :: x_col_glob(:), r_col_glob(:) - complex(psb_spk_), pointer :: b_col_glob(:) - type(psb_c_vect_type) :: b_col, x_col, r_col - type(psb_c_vect_type) :: xg, bg, xv, bv -#ifdef HAVE_GPU - type(psb_c_vect_gpu) :: vmold -#endif - complex(psb_spk_), allocatable :: xc1(:),xc2(:) - ! communications data structure - type(psb_desc_type):: desc_a - - type(psb_ctxt_type) :: ctxt - integer :: iam, np - integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes - real(psb_spk_) :: damatsize, dgmatsize - complex(psb_spk_) :: err, eps - - character(len=5) :: acfmt, agfmt - character(len=20) :: name - character(len=2) :: filefmt - integer, parameter :: iunit=12 - integer, parameter :: times=2000 - integer, parameter :: ntests=200, ngpu=50, ncnv=20 - - type(psb_c_coo_sparse_mat), target :: acoo - type(psb_c_csr_sparse_mat), target :: acsr - type(psb_c_ell_sparse_mat), target :: aell - type(psb_c_hll_sparse_mat), target :: ahll -#ifdef HAVE_GPU - type(psb_c_elg_sparse_mat), target :: aelg - type(psb_c_csrg_sparse_mat), target :: acsrg - type(psb_c_hybg_sparse_mat), target :: ahybg - type(psb_c_hlg_sparse_mat), target :: ahlg -#endif - class(psb_c_base_sparse_mat), pointer :: acmold, agmold - ! other variables - integer :: i,info,j,nrt, ns, nr, ipart, ig, nrg - integer :: internal, m,ii,nnzero - real(psb_dpk_) :: t0,t1, t2, tprec, flops - real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,& - & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 - integer :: nrhs, nrow, n_row, dim, nv, ne - integer, allocatable :: ivg(:), ipv(:) - - - call psb_init(ctxt) - call psb_info(ctxt,iam,np) -#ifdef HAVE_GPU - call psb_gpu_init(ctxt) -#endif - if (iam < 0) then - ! This should not happen, but just in case - call psb_exit(ctxt) - stop - endif - - - name='file_spmv' - if(psb_get_errstatus() /= 0) goto 9999 - info=psb_success_ - call psb_set_errverbosity(2) - if (iam == psb_root_) then - write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ - write(*,*) 'This is the ',trim(name),' sample program' - end if -#ifdef HAVE_GPU - write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() - write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) -#endif - - if (iam == 0) then - write(*,*) 'Matrix? ' - call read_data(mtrx_file,psb_inp_unit) - write(*,*) 'file format' - call read_data(filefmt,psb_inp_unit) - write(*,*) 'CPU format' - call read_data(acfmt,psb_inp_unit) - write(*,*) 'GPU format' - call read_data(agfmt,psb_inp_unit) - write(*,*) 'distribution ' - call read_data(ipart,psb_inp_unit) - write(*,*) 'Read all data, going on' - end if - call psb_bcast(ctxt,mtrx_file) - call psb_bcast(ctxt,filefmt) - call psb_bcast(ctxt,acfmt) - call psb_bcast(ctxt,agfmt) - call psb_bcast(ctxt,ipart) - call psb_barrier(ctxt) - t0 = psb_wtime() - ! read the input matrix to be processed and (possibly) the rhs - nrhs = 1 - - if (iam==psb_root_) then - select case(psb_toupper(filefmt)) - case('MM') - ! For Matrix Market we have an input file for the matrix - ! and an (optional) second file for the RHS. - call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file) - - case ('HB') - ! For Harwell-Boeing we have a single file which may or may not - ! contain an RHS. - call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file) - - case default - info = -1 - write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt - end select - if (info /= 0) then - write(psb_err_unit,*) 'Error while reading input matrix ' - call psb_abort(ctxt) - end if - - ! - ! Always get nnz from original matrix. - ! Some formats add fill-in and do not keep track - ! of how many were added. So if the original matrix - ! contained some extra zeros, the count of entries - ! is not recoverable exactly. - ! - nrt = aux_a%get_nrows() - annz = aux_a%get_nzeros() - call psb_bcast(ctxt,annz) - call psb_bcast(ctxt,nrt) - - write(psb_out_unit,'("Generating an rhs...")') - write(psb_out_unit,'(" ")') - call psb_realloc(nrt,1,aux_b,info) - if (info /= 0) then - call psb_errpush(4000,name) - goto 9999 - endif - - b_col_glob => aux_b(:,1) - do i=1, nrt - b_col_glob(i) = 1.d0 - enddo - - else - - call psb_bcast(ctxt,annz) - call psb_bcast(ctxt,nrt) - - end if - - - select case(psb_toupper(acfmt)) - case('COO') - acmold => acoo - case('CSR') - acmold => acsr - case('ELL') - acmold => aell - case('HLL') - acmold => ahll - case default - write(*,*) 'Unknown format defaulting to CSR' - acmold => acsr - end select - -#ifdef HAVE_GPU - select case(psb_toupper(agfmt)) - case('ELG') - agmold => aelg - case('HLG') - agmold => ahlg - case('CSRG') - agmold => acsrg - case('HYBG') - agmold => ahybg - case default - write(*,*) 'Unknown format defaulting to HLG' - agmold => ahlg - end select -#endif - - - ! switch over different partition types - if (ipart == 0) then - call psb_barrier(ctxt) - if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")') - allocate(ivg(nrt),ipv(np)) - do i=1,nrt - call part_block(i,nrt,np,ipv,nv) - ivg(i) = ipv(1) - enddo - call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) - else if (ipart == 2) then - if (iam==psb_root_) then - write(psb_out_unit,'("Partition type: graph")') - write(psb_out_unit,'(" ")') - ! write(psb_err_unit,'("Build type: graph")') - call build_mtpart(aux_a,np) - endif - call psb_barrier(ctxt) - call distr_mtpart(psb_root_,ctxt) - call getv_mtpart(ivg) - call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) - else - if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")') - call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block) - end if - - call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_) - - t2 = psb_wtime() - t0 - - call psb_amx(ctxt, t2) - - if (iam==psb_root_) then - write(psb_out_unit,'(" ")') - write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2 - write(psb_out_unit,'(" ")') - end if - call a%cscnv(aux_a,info,mold=acoo) - tcnvcsr = 0 - tcnvgpu = 0 - nr = desc_a%get_local_rows() - nrg = desc_a%get_global_rows() - call psb_geall(x_col,desc_a,info) - do i=1, nr - call desc_a%l2g(i,ig,info) - call psb_geins(ione,(/ig/),(/(cone + (cone*ig)/nrg)/),x_col,desc_a,info) - end do - call psb_geasb(x_col,desc_a,info) - do j=1, ncnv - call aux_a%cscnv(a,info,mold=acoo) - call psb_barrier(ctxt) - t1 = psb_wtime() - call a%cscnv(info,mold=acmold) - t2 = psb_Wtime() -t1 - call psb_amx(ctxt,t2) - tcnvcsr = tcnvcsr + t2 - if (j==1) tcnvc1 = t2 - xc1 = x_col%get_vect() - call xv%bld(xc1) - call psb_geasb(bv,desc_a,info,scratch=.true.) - -#ifdef HAVE_GPU - - call aux_a%cscnv(agpu,info,mold=acoo) - call xg%bld(xc1,mold=vmold) - call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) - call psb_barrier(ctxt) - t1 = psb_wtime() - call agpu%cscnv(info,mold=agmold) - call psb_gpu_DeviceSync() - t2 = psb_Wtime() -t1 - call psb_amx(ctxt,t2) - if (j==1) tcnvg1 = t2 - tcnvgpu = tcnvgpu + t2 -#endif - end do - - call psb_barrier(ctxt) - t1 = psb_wtime() - do i=1,ntests - call psb_spmm(cone,a,xv,czero,bv,desc_a,info) - end do - call psb_barrier(ctxt) - t2 = psb_wtime() - t1 - call psb_amx(ctxt,t2) - -#ifdef HAVE_GPU - ! FIXME: cache flush needed here - call psb_barrier(ctxt) - tt1 = psb_wtime() - do i=1,ntests - call psb_spmm(cone,agpu,xv,czero,bg,desc_a,info) - if ((info /= 0).or.(psb_get_errstatus()/=0)) then - write(0,*) 'From 1 spmm',info,i,ntests - call psb_error() - stop - end if - - end do - call psb_gpu_DeviceSync() - call psb_barrier(ctxt) - tt2 = psb_wtime() - tt1 - call psb_amx(ctxt,tt2) - xc1 = bv%get_vect() - xc2 = bg%get_vect() - nr = desc_a%get_local_rows() - eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) - call psb_amx(ctxt,eps) - if (iam==0) write(*,*) 'Max diff on xGPU',eps - - call xg%sync() - ! FIXME: cache flush needed here - - call psb_barrier(ctxt) - gt1 = psb_wtime() - do i=1,ntests*ngpu - call psb_spmm(cone,agpu,xg,czero,bg,desc_a,info) - if ((info /= 0).or.(psb_get_errstatus()/=0)) then - write(0,*) 'From 2 spmm',info,i,ntests - call psb_error() - stop - end if - - end do - ! For timing purposes we need to make sure all threads - ! in the device are done. - call psb_gpu_DeviceSync() - call psb_barrier(ctxt) - gt2 = psb_wtime() - gt1 - call psb_amx(ctxt,gt2) - call bg%sync() - xc1 = bv%get_vect() - xc2 = bg%get_vect() - call psb_geaxpby(-cone,bg,+cone,bv,desc_a,info) - eps = psb_geamax(bv,desc_a,info) - - call psb_amx(ctxt,t2) - nr = desc_a%get_local_rows() - eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) - call psb_amx(ctxt,eps) - if (iam==0) write(*,*) 'Max diff on GPU',eps -#endif - - - amatsize = a%sizeof() - agmatsize = agpu%sizeof() - damatsize = amatsize - damatsize = damatsize/(1024*1024) - dgmatsize = agmatsize - dgmatsize = dgmatsize/(1024*1024) - descsize = psb_sizeof(desc_a) - call psb_sum(ctxt,damatsize) - call psb_sum(ctxt,dgmatsize) - call psb_sum(ctxt,descsize) - - if (iam == psb_root_) then - write(psb_out_unit,'("Matrix: ",a)') mtrx_file - write(psb_out_unit,& - &'("Test on : ",i20," processors")') np - write(psb_out_unit,& - &'("Size of matrix : ",i20," ")') nrt - write(psb_out_unit,& - &'("Number of nonzeros : ",i20," ")') annz - write(psb_out_unit,& - &'("Memory occupation CPU (MBytes) : ",f20.2," ")') damatsize - write(psb_out_unit,& - &'("Memory occupation GPU (MBytes) : ",f20.2," ")') dgmatsize - write(psb_out_unit,& - &'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize - write(psb_out_unit,& - &'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize - flops = ntests*(2.d0*annz) - tflops = flops - gflops = flops * ngpu - write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() -#ifdef HAVE_GPU - write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() - write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& - & tcnvc1 - write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& - & tcnvcsr - write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& - & tcnvcsr/ncnv - write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& - & tcnvg1 - write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& - & tcnvgpu - write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& - & tcnvgpu/ncnv - -#endif - write(psb_out_unit,& - & '("Number of flops (",i0," prod) : ",F20.0," ")') & - & ntests,flops - - flops = flops / (t2) - tflops = tflops / (tt2) - gflops = gflops / (gt2) - write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& - & ntests,t2 - write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& - & t2*1.d3/(1.d0*ntests) - write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& - & flops/1.d6 -#ifdef HAVE_GPU - - write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& - & ntests, tt2 - write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& - & tt2*1.d3/(1.d0*ntests) - write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& - & tflops/1.d6 - - write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')& - & ngpu*ntests,gt2 - write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')& - & gt2*1.d3/(1.d0*ntests*ngpu) - write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')& - & gflops/1.d6 -#endif - ! - ! This computation assumes the data movement associated with CSR: - ! it is minimal in terms of coefficients. Other formats may either move - ! more data (padding etc.) or less data (if they can save on the indices). - ! - nbytes = nr*(2*2*psb_sizeof_dp + psb_sizeof_ip)+& - & annz*(2*psb_sizeof_dp + psb_sizeof_ip) - bdwdth = ntests*nbytes/(t2*1.d6) - write(psb_out_unit,*) - write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth -#ifdef HAVE_GPU - bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) - write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth -#endif - write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() - write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize - - end if - - call psb_gefree(b_col, desc_a,info) - call psb_gefree(x_col, desc_a,info) - call psb_gefree(xv, desc_a,info) - call psb_gefree(bv, desc_a,info) - call psb_spfree(a, desc_a,info) -#ifdef HAVE_GPU - call psb_gefree(xg, desc_a,info) - call psb_gefree(bg, desc_a,info) - call psb_spfree(agpu,desc_a,info) - call psb_gpu_exit() -#endif - call psb_cdfree(desc_a,info) - - call psb_exit(ctxt) - stop - -9999 continue - call psb_error(ctxt) - -end program c_file_spmv - - - - - diff --git a/test/cudakern/d_file_spmv.F90 b/test/cudakern/d_file_spmv.F90 deleted file mode 100644 index 2bbc0bc4..00000000 --- a/test/cudakern/d_file_spmv.F90 +++ /dev/null @@ -1,496 +0,0 @@ -! -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -program d_file_spmv - use psb_base_mod - use psb_util_mod - use psb_ext_mod -#ifdef HAVE_GPU - use psb_gpu_mod -#endif - use data_input - implicit none - - ! input parameters - character(len=200) :: mtrx_file - - ! sparse matrices - type(psb_dspmat_type) :: a, aux_a, agpu - - ! dense matrices - real(psb_dpk_), allocatable, target :: aux_b(:,:), d(:) - real(psb_dpk_), allocatable , save :: x_col_glob(:), r_col_glob(:) - real(psb_dpk_), pointer :: b_col_glob(:) - type(psb_d_vect_type) :: b_col, x_col, r_col - type(psb_d_vect_type) :: xg, bg, xv, bv -#ifdef HAVE_GPU - type(psb_d_vect_gpu) :: vmold -#endif - real(psb_dpk_), allocatable :: xc1(:),xc2(:) - ! communications data structure - type(psb_desc_type):: desc_a - - type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: iam, np - integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes - real(psb_dpk_) :: err, eps, damatsize, dgmatsize - - character(len=5) :: acfmt, agfmt - character(len=20) :: name - character(len=2) :: filefmt - integer, parameter :: iunit=12 - integer, parameter :: times=2000 - integer, parameter :: ntests=200, ngpu=50, ncnv=20 - - type(psb_d_coo_sparse_mat), target :: acoo - type(psb_d_csr_sparse_mat), target :: acsr - type(psb_d_ell_sparse_mat), target :: aell - type(psb_d_hll_sparse_mat), target :: ahll - type(psb_d_hdia_sparse_mat), target :: ahdia -#ifdef HAVE_GPU - type(psb_d_elg_sparse_mat), target :: aelg - type(psb_d_csrg_sparse_mat), target :: acsrg - type(psb_d_hybg_sparse_mat), target :: ahybg - type(psb_d_hlg_sparse_mat), target :: ahlg - type(psb_d_hdiag_sparse_mat), target :: ahdiag -#endif - class(psb_d_base_sparse_mat), pointer :: acmold, agmold - ! other variables - integer(psb_lpk_) :: i,j,nrt, ns, nr, ig, nrg - integer(psb_ipk_) :: internal, m,ii,nnzero,info, ipart - real(psb_dpk_) :: t0,t1, t2, tprec, flops - real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,& - & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 - integer :: nrhs, nrow, n_row, dim, nv, ne - integer, allocatable :: ivg(:), ipv(:) - - - call psb_init(ctxt) - call psb_info(ctxt,iam,np) -#ifdef HAVE_GPU - call psb_gpu_init(ctxt) -#endif - if (iam < 0) then - ! This should not happen, but just in case - call psb_exit(ctxt) - stop - endif - - - name='file_spmv' - if(psb_get_errstatus() /= 0) goto 9999 - info=psb_success_ - call psb_set_errverbosity(2) - if (iam == psb_root_) then - write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ - write(*,*) 'This is the ',trim(name),' sample program' - end if -#ifdef HAVE_GPU - write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() - write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) -#endif - - if (iam == 0) then - write(*,*) 'Matrix? ' - call read_data(mtrx_file,psb_inp_unit) - write(*,*) 'file format' - call read_data(filefmt,psb_inp_unit) - write(*,*) 'CPU format' - call read_data(acfmt,psb_inp_unit) - write(*,*) 'GPU format' - call read_data(agfmt,psb_inp_unit) - write(*,*) 'distribution ' - call read_data(ipart,psb_inp_unit) - write(*,*) 'Read all data, going on' - end if - call psb_bcast(ctxt,mtrx_file) - call psb_bcast(ctxt,filefmt) - call psb_bcast(ctxt,acfmt) - call psb_bcast(ctxt,agfmt) - call psb_bcast(ctxt,ipart) - call psb_barrier(ctxt) - t0 = psb_wtime() - ! read the input matrix to be processed and (possibly) the rhs - nrhs = 1 - - if (iam==psb_root_) then - select case(psb_toupper(filefmt)) - case('MM') - ! For Matrix Market we have an input file for the matrix - ! and an (optional) second file for the RHS. - call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file) - - case ('HB') - ! For Harwell-Boeing we have a single file which may or may not - ! contain an RHS. - call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file) - - case default - info = -1 - write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt - end select - if (info /= 0) then - write(psb_err_unit,*) 'Error while reading input matrix ' - call psb_abort(ctxt) - end if - - ! - ! Always get nnz from original matrix. - ! Some formats add fill-in and do not keep track - ! of how many were added. So if the original matrix - ! contained some extra zeros, the count of entries - ! is not recoverable exactly. - ! - nrt = aux_a%get_nrows() - annz = aux_a%get_nzeros() - call psb_bcast(ctxt,annz) - call psb_bcast(ctxt,nrt) - - write(psb_out_unit,'("Generating an rhs...")') - write(psb_out_unit,'(" ")') - call psb_realloc(nrt,1,aux_b,info) - if (info /= 0) then - call psb_errpush(4000,name) - goto 9999 - endif - - b_col_glob => aux_b(:,1) - do i=1, nrt - b_col_glob(i) = 1.d0 - enddo - - else - - call psb_bcast(ctxt,annz) - call psb_bcast(ctxt,nrt) - - end if - - - select case(psb_toupper(acfmt)) - case('COO') - acmold => acoo - case('CSR') - acmold => acsr - case('ELL') - acmold => aell - case('HLL') - acmold => ahll - case('HDIA') - acmold => ahdia - case default - write(*,*) 'Unknown format defaulting to CSR' - acmold => acsr - end select - -#ifdef HAVE_GPU - select case(psb_toupper(agfmt)) - case('ELG') - agmold => aelg - case('HLG') - agmold => ahlg - case('CSRG') - agmold => acsrg - case('HYBG') - agmold => ahybg - case('HDIAG') - agmold => ahdiag - case default - write(*,*) 'Unknown format defaulting to HLG' - agmold => ahlg - end select -#endif - - - ! switch over different partition types - if (ipart == 0) then - call psb_barrier(ctxt) - if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")') - allocate(ivg(nrt),ipv(np)) - do i=1,nrt - call part_block(i,nrt,np,ipv,nv) - ivg(i) = ipv(1) - enddo - call psb_matdist(aux_a, a, ctxt, desc_a,info,vg=ivg) - else if (ipart == 2) then - if (iam==psb_root_) then - write(psb_out_unit,'("Partition type: graph")') - write(psb_out_unit,'(" ")') - ! write(psb_err_unit,'("Build type: graph")') - call build_mtpart(aux_a,np) - endif - call psb_barrier(ctxt) - call distr_mtpart(psb_root_,ctxt) - call getv_mtpart(ivg) - call psb_matdist(aux_a, a, ctxt, desc_a,info,vg=ivg) - else - if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")') - call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block) - end if - - call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_) - - t2 = psb_wtime() - t0 - - call psb_amx(ctxt, t2) - - if (iam==psb_root_) then - write(psb_out_unit,'(" ")') - write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2 - write(psb_out_unit,'(" ")') - end if - call a%cscnv(aux_a,info,mold=acoo) - tcnvcsr = 0 - tcnvgpu = 0 - nr = desc_a%get_local_rows() - nrg = desc_a%get_global_rows() - call psb_geall(x_col,desc_a,info) - do i=1, nr - call desc_a%l2g(i,ig,info) - call psb_geins(ione,(/ig/),(/(done + (done*ig)/nrg)/),x_col,desc_a,info) - end do - call psb_geasb(x_col,desc_a,info) - do j=1, ncnv - call aux_a%cscnv(a,info,mold=acoo) - call psb_barrier(ctxt) - t1 = psb_wtime() - call a%cscnv(info,mold=acmold) - t2 = psb_Wtime() -t1 - call psb_amx(ctxt,t2) - tcnvcsr = tcnvcsr + t2 - if (j==1) tcnvc1 = t2 - xc1 = x_col%get_vect() - call xv%bld(xc1) - call psb_geasb(bv,desc_a,info,scratch=.true.) - -#ifdef HAVE_GPU - - call aux_a%cscnv(agpu,info,mold=acoo) - call xg%bld(xc1,mold=vmold) - call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) - call psb_barrier(ctxt) - t1 = psb_wtime() - call agpu%cscnv(info,mold=agmold) - call psb_gpu_DeviceSync() - t2 = psb_Wtime() -t1 - call psb_amx(ctxt,t2) - if (j==1) tcnvg1 = t2 - tcnvgpu = tcnvgpu + t2 -#endif - end do - - call psb_barrier(ctxt) - t1 = psb_wtime() - do i=1,ntests - call psb_spmm(done,a,xv,dzero,bv,desc_a,info) - end do - call psb_barrier(ctxt) - t2 = psb_wtime() - t1 - call psb_amx(ctxt,t2) - -#ifdef HAVE_GPU - ! FIXME: cache flush needed here - call psb_barrier(ctxt) - tt1 = psb_wtime() - do i=1,ntests - call psb_spmm(done,agpu,xv,dzero,bg,desc_a,info) - if ((info /= 0).or.(psb_get_errstatus()/=0)) then - write(0,*) 'From 1 spmm',info,i,ntests - call psb_error() - stop - end if - - end do - call psb_gpu_DeviceSync() - call psb_barrier(ctxt) - tt2 = psb_wtime() - tt1 - call psb_amx(ctxt,tt2) - xc1 = bv%get_vect() - xc2 = bg%get_vect() - nr = desc_a%get_local_rows() - eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) - call psb_amx(ctxt,eps) - if (iam==0) write(*,*) 'Max diff on xGPU',eps - - call xg%sync() - ! FIXME: cache flush needed here - - call psb_barrier(ctxt) - gt1 = psb_wtime() - do i=1,ntests*ngpu - call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info) - if ((info /= 0).or.(psb_get_errstatus()/=0)) then - write(0,*) 'From 2 spmm',info,i,ntests - call psb_error() - stop - end if - - end do - ! For timing purposes we need to make sure all threads - ! in the device are done. - call psb_gpu_DeviceSync() - call psb_barrier(ctxt) - gt2 = psb_wtime() - gt1 - call psb_amx(ctxt,gt2) - call bg%sync() - xc1 = bv%get_vect() - xc2 = bg%get_vect() - call psb_geaxpby(-done,bg,+done,bv,desc_a,info) - eps = psb_geamax(bv,desc_a,info) - - call psb_amx(ctxt,t2) - nr = desc_a%get_local_rows() - eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) - call psb_amx(ctxt,eps) - if (iam==0) write(*,*) 'Max diff on GPU',eps -#endif - - - amatsize = a%sizeof() - agmatsize = agpu%sizeof() - damatsize = amatsize - damatsize = damatsize/(1024*1024) - dgmatsize = agmatsize - dgmatsize = dgmatsize/(1024*1024) - descsize = psb_sizeof(desc_a) - call psb_sum(ctxt,damatsize) - call psb_sum(ctxt,dgmatsize) - call psb_sum(ctxt,descsize) - - if (iam == psb_root_) then - write(psb_out_unit,'("Matrix: ",a)') mtrx_file - write(psb_out_unit,& - &'("Test on : ",i20," processors")') np - write(psb_out_unit,& - &'("Size of matrix : ",i20," ")') nrt - write(psb_out_unit,& - &'("Number of nonzeros : ",i20," ")') annz - write(psb_out_unit,& - &'("Memory occupation CPU (MBytes) : ",f20.2," ")') damatsize - write(psb_out_unit,& - &'("Memory occupation GPU (MBytes) : ",f20.2," ")') dgmatsize - write(psb_out_unit,& - &'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize - write(psb_out_unit,& - &'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize - flops = ntests*(2.d0*annz) - tflops = flops - gflops = flops * ngpu - write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() -#ifdef HAVE_GPU - write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() - write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& - & tcnvc1 - write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& - & tcnvcsr - write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& - & tcnvcsr/ncnv - write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& - & tcnvg1 - write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& - & tcnvgpu - write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& - & tcnvgpu/ncnv - -#endif - write(psb_out_unit,& - & '("Number of flops (",i0," prod) : ",F20.0," ")') & - & ntests,flops - - flops = flops / (t2) - tflops = tflops / (tt2) - gflops = gflops / (gt2) - write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& - & ntests,t2 - write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& - & t2*1.d3/(1.d0*ntests) - write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& - & flops/1.d6 -#ifdef HAVE_GPU - - write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& - & ntests, tt2 - write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& - & tt2*1.d3/(1.d0*ntests) - write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& - & tflops/1.d6 - - write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')& - & ngpu*ntests,gt2 - write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')& - & gt2*1.d3/(1.d0*ntests*ngpu) - write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')& - & gflops/1.d6 -#endif - ! - ! This computation assumes the data movement associated with CSR: - ! it is minimal in terms of coefficients. Other formats may either move - ! more data (padding etc.) or less data (if they can save on the indices). - ! - nbytes = nr*(2*psb_sizeof_dp + psb_sizeof_ip)+& - & annz*(psb_sizeof_dp + psb_sizeof_ip) - bdwdth = ntests*nbytes/(t2*1.d6) - write(psb_out_unit,*) - write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth -#ifdef HAVE_GPU - bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) - write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth -#endif - write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() - write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize - - end if - - call psb_gefree(b_col, desc_a,info) - call psb_gefree(x_col, desc_a,info) - call psb_gefree(xv, desc_a,info) - call psb_gefree(bv, desc_a,info) - call psb_spfree(a, desc_a,info) -#ifdef HAVE_GPU - call psb_gefree(xg, desc_a,info) - call psb_gefree(bg, desc_a,info) - call psb_spfree(agpu,desc_a,info) - call psb_gpu_exit() -#endif - call psb_cdfree(desc_a,info) - - call psb_exit(ctxt) - stop - -9999 continue - call psb_error(ctxt) - -end program d_file_spmv - - - - - diff --git a/test/cudakern/s_file_spmv.F90 b/test/cudakern/s_file_spmv.F90 deleted file mode 100644 index 37a52717..00000000 --- a/test/cudakern/s_file_spmv.F90 +++ /dev/null @@ -1,496 +0,0 @@ -! -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -program s_file_spmv - use psb_base_mod - use psb_util_mod - use psb_ext_mod -#ifdef HAVE_GPU - use psb_gpu_mod -#endif - use data_input - implicit none - - ! input parameters - character(len=200) :: mtrx_file - - ! sparse matrices - type(psb_sspmat_type) :: a, aux_a, agpu - - ! dense matrices - real(psb_spk_), allocatable, target :: aux_b(:,:), d(:) - real(psb_spk_), allocatable , save :: x_col_glob(:), r_col_glob(:) - real(psb_spk_), pointer :: b_col_glob(:) - type(psb_s_vect_type) :: b_col, x_col, r_col - type(psb_s_vect_type) :: xg, bg, xv, bv -#ifdef HAVE_GPU - type(psb_s_vect_gpu) :: vmold -#endif - real(psb_spk_), allocatable :: xc1(:),xc2(:) - ! communications data structure - type(psb_desc_type):: desc_a - - type(psb_ctxt_type) :: ctxt - integer :: iam, np - integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes - real(psb_spk_) :: err, eps, samatsize, sgmatsize - - character(len=5) :: acfmt, agfmt - character(len=20) :: name - character(len=2) :: filefmt - integer, parameter :: iunit=12 - integer, parameter :: times=2000 - integer, parameter :: ntests=200, ngpu=50, ncnv=20 - - type(psb_s_coo_sparse_mat), target :: acoo - type(psb_s_csr_sparse_mat), target :: acsr - type(psb_s_ell_sparse_mat), target :: aell - type(psb_s_hll_sparse_mat), target :: ahll - type(psb_s_hdia_sparse_mat), target :: ahdia -#ifdef HAVE_GPU - type(psb_s_elg_sparse_mat), target :: aelg - type(psb_s_csrg_sparse_mat), target :: acsrg - type(psb_s_hybg_sparse_mat), target :: ahybg - type(psb_s_hlg_sparse_mat), target :: ahlg - type(psb_s_hdiag_sparse_mat), target :: ahdiag -#endif - class(psb_s_base_sparse_mat), pointer :: acmold, agmold - ! other variables - integer :: i,info,j,nrt, ns, nr, ipart, ig, nrg - integer :: internal, m,ii,nnzero - real(psb_dpk_) :: t0,t1, t2, tprec, flops - real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,& - & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 - integer :: nrhs, nrow, n_row, dim, nv, ne - integer, allocatable :: ivg(:), ipv(:) - - - call psb_init(ctxt) - call psb_info(ctxt,iam,np) -#ifdef HAVE_GPU - call psb_gpu_init(ctxt) -#endif - if (iam < 0) then - ! This should not happen, but just in case - call psb_exit(ctxt) - stop - endif - - - name='file_spmv' - if(psb_get_errstatus() /= 0) goto 9999 - info=psb_success_ - call psb_set_errverbosity(2) - if (iam == psb_root_) then - write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ - write(*,*) 'This is the ',trim(name),' sample program' - end if -#ifdef HAVE_GPU - write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() - write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) -#endif - - if (iam == 0) then - write(*,*) 'Matrix? ' - call read_data(mtrx_file,psb_inp_unit) - write(*,*) 'file format' - call read_data(filefmt,psb_inp_unit) - write(*,*) 'CPU format' - call read_data(acfmt,psb_inp_unit) - write(*,*) 'GPU format' - call read_data(agfmt,psb_inp_unit) - write(*,*) 'distribution ' - call read_data(ipart,psb_inp_unit) - write(*,*) 'Read all data, going on' - end if - call psb_bcast(ctxt,mtrx_file) - call psb_bcast(ctxt,filefmt) - call psb_bcast(ctxt,acfmt) - call psb_bcast(ctxt,agfmt) - call psb_bcast(ctxt,ipart) - call psb_barrier(ctxt) - t0 = psb_wtime() - ! read the input matrix to be processed and (possibly) the rhs - nrhs = 1 - - if (iam==psb_root_) then - select case(psb_toupper(filefmt)) - case('MM') - ! For Matrix Market we have an input file for the matrix - ! and an (optional) second file for the RHS. - call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file) - - case ('HB') - ! For Harwell-Boeing we have a single file which may or may not - ! contain an RHS. - call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file) - - case default - info = -1 - write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt - end select - if (info /= 0) then - write(psb_err_unit,*) 'Error while reading input matrix ' - call psb_abort(ctxt) - end if - - ! - ! Always get nnz from original matrix. - ! Some formats add fill-in and do not keep track - ! of how many were added. So if the original matrix - ! contained some extra zeros, the count of entries - ! is not recoverable exactly. - ! - nrt = aux_a%get_nrows() - annz = aux_a%get_nzeros() - call psb_bcast(ctxt,annz) - call psb_bcast(ctxt,nrt) - - write(psb_out_unit,'("Generating an rhs...")') - write(psb_out_unit,'(" ")') - call psb_realloc(nrt,1,aux_b,info) - if (info /= 0) then - call psb_errpush(4000,name) - goto 9999 - endif - - b_col_glob => aux_b(:,1) - do i=1, nrt - b_col_glob(i) = 1.d0 - enddo - - else - - call psb_bcast(ctxt,annz) - call psb_bcast(ctxt,nrt) - - end if - - - select case(psb_toupper(acfmt)) - case('COO') - acmold => acoo - case('CSR') - acmold => acsr - case('ELL') - acmold => aell - case('HLL') - acmold => ahll - case('HDIA') - acmold => ahdia - case default - write(*,*) 'Unknown format defaulting to CSR' - acmold => acsr - end select - -#ifdef HAVE_GPU - select case(psb_toupper(agfmt)) - case('ELG') - agmold => aelg - case('HLG') - agmold => ahlg - case('CSRG') - agmold => acsrg - case('HYBG') - agmold => ahybg - case('HDIAG') - agmold => ahdiag - case default - write(*,*) 'Unknown format defaulting to HLG' - agmold => ahlg - end select -#endif - - - ! switch over different partition types - if (ipart == 0) then - call psb_barrier(ctxt) - if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")') - allocate(ivg(nrt),ipv(np)) - do i=1,nrt - call part_block(i,nrt,np,ipv,nv) - ivg(i) = ipv(1) - enddo - call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) - else if (ipart == 2) then - if (iam==psb_root_) then - write(psb_out_unit,'("Partition type: graph")') - write(psb_out_unit,'(" ")') - ! write(psb_err_unit,'("Build type: graph")') - call build_mtpart(aux_a,np) - endif - call psb_barrier(ctxt) - call distr_mtpart(psb_root_,ctxt) - call getv_mtpart(ivg) - call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) - else - if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")') - call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block) - end if - - call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_) - - t2 = psb_wtime() - t0 - - call psb_amx(ctxt, t2) - - if (iam==psb_root_) then - write(psb_out_unit,'(" ")') - write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2 - write(psb_out_unit,'(" ")') - end if - call a%cscnv(aux_a,info,mold=acoo) - tcnvcsr = 0 - tcnvgpu = 0 - nr = desc_a%get_local_rows() - nrg = desc_a%get_global_rows() - call psb_geall(x_col,desc_a,info) - do i=1, nr - call desc_a%l2g(i,ig,info) - call psb_geins(ione,(/ig/),(/(sone + (sone*ig)/nrg)/),x_col,desc_a,info) - end do - call psb_geasb(x_col,desc_a,info) - do j=1, ncnv - call aux_a%cscnv(a,info,mold=acoo) - call psb_barrier(ctxt) - t1 = psb_wtime() - call a%cscnv(info,mold=acmold) - t2 = psb_Wtime() -t1 - call psb_amx(ctxt,t2) - tcnvcsr = tcnvcsr + t2 - if (j==1) tcnvc1 = t2 - xc1 = x_col%get_vect() - call xv%bld(xc1) - call psb_geasb(bv,desc_a,info,scratch=.true.) - -#ifdef HAVE_GPU - - call aux_a%cscnv(agpu,info,mold=acoo) - call xg%bld(xc1,mold=vmold) - call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) - call psb_barrier(ctxt) - t1 = psb_wtime() - call agpu%cscnv(info,mold=agmold) - call psb_gpu_DeviceSync() - t2 = psb_Wtime() -t1 - call psb_amx(ctxt,t2) - if (j==1) tcnvg1 = t2 - tcnvgpu = tcnvgpu + t2 -#endif - end do - - call psb_barrier(ctxt) - t1 = psb_wtime() - do i=1,ntests - call psb_spmm(sone,a,xv,szero,bv,desc_a,info) - end do - call psb_barrier(ctxt) - t2 = psb_wtime() - t1 - call psb_amx(ctxt,t2) - -#ifdef HAVE_GPU - ! FIXME: cache flush needed here - call psb_barrier(ctxt) - tt1 = psb_wtime() - do i=1,ntests - call psb_spmm(sone,agpu,xv,szero,bg,desc_a,info) - if ((info /= 0).or.(psb_get_errstatus()/=0)) then - write(0,*) 'From 1 spmm',info,i,ntests - call psb_error() - stop - end if - - end do - call psb_gpu_DeviceSync() - call psb_barrier(ctxt) - tt2 = psb_wtime() - tt1 - call psb_amx(ctxt,tt2) - xc1 = bv%get_vect() - xc2 = bg%get_vect() - nr = desc_a%get_local_rows() - eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) - call psb_amx(ctxt,eps) - if (iam==0) write(*,*) 'Max diff on xGPU',eps - - call xg%sync() - ! FIXME: cache flush needed here - - call psb_barrier(ctxt) - gt1 = psb_wtime() - do i=1,ntests*ngpu - call psb_spmm(sone,agpu,xg,szero,bg,desc_a,info) - if ((info /= 0).or.(psb_get_errstatus()/=0)) then - write(0,*) 'From 2 spmm',info,i,ntests - call psb_error() - stop - end if - - end do - ! For timing purposes we need to make sure all threads - ! in the device are done. - call psb_gpu_DeviceSync() - call psb_barrier(ctxt) - gt2 = psb_wtime() - gt1 - call psb_amx(ctxt,gt2) - call bg%sync() - xc1 = bv%get_vect() - xc2 = bg%get_vect() - call psb_geaxpby(-sone,bg,+sone,bv,desc_a,info) - eps = psb_geamax(bv,desc_a,info) - - call psb_amx(ctxt,t2) - nr = desc_a%get_local_rows() - eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) - call psb_amx(ctxt,eps) - if (iam==0) write(*,*) 'Max diff on GPU',eps -#endif - - - amatsize = a%sizeof() - agmatsize = agpu%sizeof() - samatsize = amatsize - samatsize = samatsize/(1024*1024) - sgmatsize = agmatsize - sgmatsize = sgmatsize/(1024*1024) - descsize = psb_sizeof(desc_a) - call psb_sum(ctxt,samatsize) - call psb_sum(ctxt,sgmatsize) - call psb_sum(ctxt,descsize) - - if (iam == psb_root_) then - write(psb_out_unit,'("Matrix: ",a)') mtrx_file - write(psb_out_unit,& - &'("Test on : ",i20," processors")') np - write(psb_out_unit,& - &'("Size of matrix : ",i20," ")') nrt - write(psb_out_unit,& - &'("Number of nonzeros : ",i20," ")') annz - write(psb_out_unit,& - &'("Memory occupation CPU (MBytes) : ",f20.2," ")') samatsize - write(psb_out_unit,& - &'("Memory occupation GPU (MBytes) : ",f20.2," ")') sgmatsize - write(psb_out_unit,& - &'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize - write(psb_out_unit,& - &'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize - flops = ntests*(2.d0*annz) - tflops = flops - gflops = flops * ngpu - write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() -#ifdef HAVE_GPU - write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() - write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& - & tcnvc1 - write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& - & tcnvcsr - write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& - & tcnvcsr/ncnv - write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& - & tcnvg1 - write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& - & tcnvgpu - write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& - & tcnvgpu/ncnv - -#endif - write(psb_out_unit,& - & '("Number of flops (",i0," prod) : ",F20.0," ")') & - & ntests,flops - - flops = flops / (t2) - tflops = tflops / (tt2) - gflops = gflops / (gt2) - write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& - & ntests,t2 - write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& - & t2*1.d3/(1.d0*ntests) - write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& - & flops/1.d6 -#ifdef HAVE_GPU - - write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& - & ntests, tt2 - write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& - & tt2*1.d3/(1.d0*ntests) - write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& - & tflops/1.d6 - - write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')& - & ngpu*ntests,gt2 - write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')& - & gt2*1.d3/(1.d0*ntests*ngpu) - write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')& - & gflops/1.d6 -#endif - ! - ! This computation assumes the data movement associated with CSR: - ! it is minimal in terms of coefficients. Other formats may either move - ! more data (padding etc.) or less data (if they can save on the indices). - ! - nbytes = nr*(2*psb_sizeof_sp + psb_sizeof_ip)+& - & annz*(psb_sizeof_sp + psb_sizeof_ip) - bdwdth = ntests*nbytes/(t2*1.d6) - write(psb_out_unit,*) - write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth -#ifdef HAVE_GPU - bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) - write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth -#endif - write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() - write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize - - end if - - call psb_gefree(b_col, desc_a,info) - call psb_gefree(x_col, desc_a,info) - call psb_gefree(xv, desc_a,info) - call psb_gefree(bv, desc_a,info) - call psb_spfree(a, desc_a,info) -#ifdef HAVE_GPU - call psb_gefree(xg, desc_a,info) - call psb_gefree(bg, desc_a,info) - call psb_spfree(agpu,desc_a,info) - call psb_gpu_exit() -#endif - call psb_cdfree(desc_a,info) - - call psb_exit(ctxt) - stop - -9999 continue - call psb_error(ctxt) - -end program s_file_spmv - - - - - diff --git a/test/cudakern/z_file_spmv.F90 b/test/cudakern/z_file_spmv.F90 deleted file mode 100644 index 153dd5e1..00000000 --- a/test/cudakern/z_file_spmv.F90 +++ /dev/null @@ -1,491 +0,0 @@ -! -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -program z_file_spmv - use psb_base_mod - use psb_util_mod - use psb_ext_mod -#ifdef HAVE_GPU - use psb_gpu_mod -#endif - use data_input - implicit none - - ! input parameters - character(len=200) :: mtrx_file - - ! sparse matrices - type(psb_zspmat_type) :: a, aux_a, agpu - - ! dense matrices - complex(psb_dpk_), allocatable, target :: aux_b(:,:), d(:) - complex(psb_dpk_), allocatable , save :: x_col_glob(:), r_col_glob(:) - complex(psb_dpk_), pointer :: b_col_glob(:) - type(psb_z_vect_type) :: b_col, x_col, r_col - type(psb_z_vect_type) :: xg, bg, xv, bv -#ifdef HAVE_GPU - type(psb_z_vect_gpu) :: vmold -#endif - complex(psb_dpk_), allocatable :: xc1(:),xc2(:) - ! communications data structure - type(psb_desc_type):: desc_a - - type(psb_ctxt_type) :: ctxt - integer :: iam, np - integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes - real(psb_dpk_) :: damatsize, dgmatsize - complex(psb_dpk_) :: err, eps - - character(len=5) :: acfmt, agfmt - character(len=20) :: name - character(len=2) :: filefmt - integer, parameter :: iunit=12 - integer, parameter :: times=2000 - integer, parameter :: ntests=200, ngpu=50, ncnv=20 - - type(psb_z_coo_sparse_mat), target :: acoo - type(psb_z_csr_sparse_mat), target :: acsr - type(psb_z_ell_sparse_mat), target :: aell - type(psb_z_hll_sparse_mat), target :: ahll -#ifdef HAVE_GPU - type(psb_z_elg_sparse_mat), target :: aelg - type(psb_z_csrg_sparse_mat), target :: acsrg - type(psb_z_hybg_sparse_mat), target :: ahybg - type(psb_z_hlg_sparse_mat), target :: ahlg -#endif - class(psb_z_base_sparse_mat), pointer :: acmold, agmold - ! other variables - integer :: i,info,j,nrt, ns, nr, ipart, ig, nrg - integer :: internal, m,ii,nnzero - real(psb_dpk_) :: t0,t1, t2, tprec, flops - real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,& - & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 - integer :: nrhs, nrow, n_row, dim, nv, ne - integer, allocatable :: ivg(:), ipv(:) - - - call psb_init(ctxt) - call psb_info(ctxt,iam,np) -#ifdef HAVE_GPU - call psb_gpu_init(ctxt) -#endif - if (iam < 0) then - ! This should not happen, but just in case - call psb_exit(ctxt) - stop - endif - - - name='file_spmv' - if(psb_get_errstatus() /= 0) goto 9999 - info=psb_success_ - call psb_set_errverbosity(2) - if (iam == psb_root_) then - write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ - write(*,*) 'This is the ',trim(name),' sample program' - end if -#ifdef HAVE_GPU - write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() - write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) -#endif - - if (iam == 0) then - write(*,*) 'Matrix? ' - call read_data(mtrx_file,psb_inp_unit) - write(*,*) 'file format' - call read_data(filefmt,psb_inp_unit) - write(*,*) 'CPU format' - call read_data(acfmt,psb_inp_unit) - write(*,*) 'GPU format' - call read_data(agfmt,psb_inp_unit) - write(*,*) 'distribution ' - call read_data(ipart,psb_inp_unit) - write(*,*) 'Read all data, going on' - end if - call psb_bcast(ctxt,mtrx_file) - call psb_bcast(ctxt,filefmt) - call psb_bcast(ctxt,acfmt) - call psb_bcast(ctxt,agfmt) - call psb_bcast(ctxt,ipart) - call psb_barrier(ctxt) - t0 = psb_wtime() - ! read the input matrix to be processed and (possibly) the rhs - nrhs = 1 - - if (iam==psb_root_) then - select case(psb_toupper(filefmt)) - case('MM') - ! For Matrix Market we have an input file for the matrix - ! and an (optional) second file for the RHS. - call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file) - - case ('HB') - ! For Harwell-Boeing we have a single file which may or may not - ! contain an RHS. - call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file) - - case default - info = -1 - write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt - end select - if (info /= 0) then - write(psb_err_unit,*) 'Error while reading input matrix ' - call psb_abort(ctxt) - end if - - ! - ! Always get nnz from original matrix. - ! Some formats add fill-in and do not keep track - ! of how many were added. So if the original matrix - ! contained some extra zeros, the count of entries - ! is not recoverable exactly. - ! - nrt = aux_a%get_nrows() - annz = aux_a%get_nzeros() - call psb_bcast(ctxt,annz) - call psb_bcast(ctxt,nrt) - - write(psb_out_unit,'("Generating an rhs...")') - write(psb_out_unit,'(" ")') - call psb_realloc(nrt,1,aux_b,info) - if (info /= 0) then - call psb_errpush(4000,name) - goto 9999 - endif - - b_col_glob => aux_b(:,1) - do i=1, nrt - b_col_glob(i) = 1.d0 - enddo - - else - - call psb_bcast(ctxt,annz) - call psb_bcast(ctxt,nrt) - - end if - - - select case(psb_toupper(acfmt)) - case('COO') - acmold => acoo - case('CSR') - acmold => acsr - case('ELL') - acmold => aell - case('HLL') - acmold => ahll - case default - write(*,*) 'Unknown format defaulting to CSR' - acmold => acsr - end select - -#ifdef HAVE_GPU - select case(psb_toupper(agfmt)) - case('ELG') - agmold => aelg - case('HLG') - agmold => ahlg - case('CSRG') - agmold => acsrg - case('HYBG') - agmold => ahybg - case default - write(*,*) 'Unknown format defaulting to HLG' - agmold => ahlg - end select -#endif - - - ! switch over different partition types - if (ipart == 0) then - call psb_barrier(ctxt) - if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")') - allocate(ivg(nrt),ipv(np)) - do i=1,nrt - call part_block(i,nrt,np,ipv,nv) - ivg(i) = ipv(1) - enddo - call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) - else if (ipart == 2) then - if (iam==psb_root_) then - write(psb_out_unit,'("Partition type: graph")') - write(psb_out_unit,'(" ")') - ! write(psb_err_unit,'("Build type: graph")') - call build_mtpart(aux_a,np) - endif - call psb_barrier(ctxt) - call distr_mtpart(psb_root_,ctxt) - call getv_mtpart(ivg) - call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) - else - if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")') - call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block) - end if - - call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_) - - t2 = psb_wtime() - t0 - - call psb_amx(ctxt, t2) - - if (iam==psb_root_) then - write(psb_out_unit,'(" ")') - write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2 - write(psb_out_unit,'(" ")') - end if - call a%cscnv(aux_a,info,mold=acoo) - tcnvcsr = 0 - tcnvgpu = 0 - nr = desc_a%get_local_rows() - nrg = desc_a%get_global_rows() - call psb_geall(x_col,desc_a,info) - do i=1, nr - call desc_a%l2g(i,ig,info) - call psb_geins(ione,(/ig/),(/(zone + (zone*ig)/nrg)/),x_col,desc_a,info) - end do - call psb_geasb(x_col,desc_a,info) - do j=1, ncnv - call aux_a%cscnv(a,info,mold=acoo) - call psb_barrier(ctxt) - t1 = psb_wtime() - call a%cscnv(info,mold=acmold) - t2 = psb_Wtime() -t1 - call psb_amx(ctxt,t2) - tcnvcsr = tcnvcsr + t2 - if (j==1) tcnvc1 = t2 - xc1 = x_col%get_vect() - call xv%bld(xc1) - call psb_geasb(bv,desc_a,info,scratch=.true.) - -#ifdef HAVE_GPU - - call aux_a%cscnv(agpu,info,mold=acoo) - call xg%bld(xc1,mold=vmold) - call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) - call psb_barrier(ctxt) - t1 = psb_wtime() - call agpu%cscnv(info,mold=agmold) - call psb_gpu_DeviceSync() - t2 = psb_Wtime() -t1 - call psb_amx(ctxt,t2) - if (j==1) tcnvg1 = t2 - tcnvgpu = tcnvgpu + t2 -#endif - end do - - call psb_barrier(ctxt) - t1 = psb_wtime() - do i=1,ntests - call psb_spmm(zone,a,xv,zzero,bv,desc_a,info) - end do - call psb_barrier(ctxt) - t2 = psb_wtime() - t1 - call psb_amx(ctxt,t2) - -#ifdef HAVE_GPU - ! FIXME: cache flush needed here - call psb_barrier(ctxt) - tt1 = psb_wtime() - do i=1,ntests - call psb_spmm(zone,agpu,xv,zzero,bg,desc_a,info) - if ((info /= 0).or.(psb_get_errstatus()/=0)) then - write(0,*) 'From 1 spmm',info,i,ntests - call psb_error() - stop - end if - - end do - call psb_gpu_DeviceSync() - call psb_barrier(ctxt) - tt2 = psb_wtime() - tt1 - call psb_amx(ctxt,tt2) - xc1 = bv%get_vect() - xc2 = bg%get_vect() - nr = desc_a%get_local_rows() - eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) - call psb_amx(ctxt,eps) - if (iam==0) write(*,*) 'Max diff on xGPU',eps - - call xg%sync() - ! FIXME: cache flush needed here - - call psb_barrier(ctxt) - gt1 = psb_wtime() - do i=1,ntests*ngpu - call psb_spmm(zone,agpu,xg,zzero,bg,desc_a,info) - if ((info /= 0).or.(psb_get_errstatus()/=0)) then - write(0,*) 'From 2 spmm',info,i,ntests - call psb_error() - stop - end if - - end do - ! For timing purposes we need to make sure all threads - ! in the device are done. - call psb_gpu_DeviceSync() - call psb_barrier(ctxt) - gt2 = psb_wtime() - gt1 - call psb_amx(ctxt,gt2) - call bg%sync() - xc1 = bv%get_vect() - xc2 = bg%get_vect() - call psb_geaxpby(-zone,bg,+zone,bv,desc_a,info) - eps = psb_geamax(bv,desc_a,info) - - call psb_amx(ctxt,t2) - nr = desc_a%get_local_rows() - eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) - call psb_amx(ctxt,eps) - if (iam==0) write(*,*) 'Max diff on GPU',eps -#endif - - - amatsize = a%sizeof() - agmatsize = agpu%sizeof() - damatsize = amatsize - damatsize = damatsize/(1024*1024) - dgmatsize = agmatsize - dgmatsize = dgmatsize/(1024*1024) - descsize = psb_sizeof(desc_a) - call psb_sum(ctxt,damatsize) - call psb_sum(ctxt,dgmatsize) - call psb_sum(ctxt,descsize) - - if (iam == psb_root_) then - write(psb_out_unit,'("Matrix: ",a)') mtrx_file - write(psb_out_unit,& - &'("Test on : ",i20," processors")') np - write(psb_out_unit,& - &'("Size of matrix : ",i20," ")') nrt - write(psb_out_unit,& - &'("Number of nonzeros : ",i20," ")') annz - write(psb_out_unit,& - &'("Memory occupation CPU (MBytes) : ",f20.2," ")') damatsize - write(psb_out_unit,& - &'("Memory occupation GPU (MBytes) : ",f20.2," ")') dgmatsize - write(psb_out_unit,& - &'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize - write(psb_out_unit,& - &'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize - flops = ntests*(2.d0*annz) - tflops = flops - gflops = flops * ngpu - write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() -#ifdef HAVE_GPU - write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() - write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& - & tcnvc1 - write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& - & tcnvcsr - write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& - & tcnvcsr/ncnv - write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& - & tcnvg1 - write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& - & tcnvgpu - write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& - & tcnvgpu/ncnv - -#endif - write(psb_out_unit,& - & '("Number of flops (",i0," prod) : ",F20.0," ")') & - & ntests,flops - - flops = flops / (t2) - tflops = tflops / (tt2) - gflops = gflops / (gt2) - write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& - & ntests,t2 - write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& - & t2*1.d3/(1.d0*ntests) - write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& - & flops/1.d6 -#ifdef HAVE_GPU - - write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& - & ntests, tt2 - write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& - & tt2*1.d3/(1.d0*ntests) - write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& - & tflops/1.d6 - - write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')& - & ngpu*ntests,gt2 - write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')& - & gt2*1.d3/(1.d0*ntests*ngpu) - write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')& - & gflops/1.d6 -#endif - ! - ! This computation assumes the data movement associated with CSR: - ! it is minimal in terms of coefficients. Other formats may either move - ! more data (padding etc.) or less data (if they can save on the indices). - ! - nbytes = nr*(2*2*psb_sizeof_dp + psb_sizeof_ip)+& - & annz*(2*psb_sizeof_dp + psb_sizeof_ip) - bdwdth = ntests*nbytes/(t2*1.d6) - write(psb_out_unit,*) - write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth -#ifdef HAVE_GPU - bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) - write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth -#endif - write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() - write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize - - end if - - call psb_gefree(b_col, desc_a,info) - call psb_gefree(x_col, desc_a,info) - call psb_gefree(xv, desc_a,info) - call psb_gefree(bv, desc_a,info) - call psb_spfree(a, desc_a,info) -#ifdef HAVE_GPU - call psb_gefree(xg, desc_a,info) - call psb_gefree(bg, desc_a,info) - call psb_spfree(agpu,desc_a,info) - call psb_gpu_exit() -#endif - call psb_cdfree(desc_a,info) - - call psb_exit(ctxt) - stop - -9999 continue - call psb_error(ctxt) - -end program z_file_spmv - - - - - From 78570159238d146de3f4b324325007a46781b099 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 8 Aug 2024 14:23:03 +0200 Subject: [PATCH 12/12] Cosmetic changes to vect_mod --- base/modules/serial/psb_c_vect_mod.F90 | 1 - base/modules/serial/psb_d_vect_mod.F90 | 1 - base/modules/serial/psb_s_vect_mod.F90 | 1 - base/modules/serial/psb_z_vect_mod.F90 | 1 - 4 files changed, 4 deletions(-) diff --git a/base/modules/serial/psb_c_vect_mod.F90 b/base/modules/serial/psb_c_vect_mod.F90 index 1e9510f2..a0a34621 100644 --- a/base/modules/serial/psb_c_vect_mod.F90 +++ b/base/modules/serial/psb_c_vect_mod.F90 @@ -104,7 +104,6 @@ module psb_c_vect_mod generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 procedure, pass(z) :: upd_xyz => c_vect_upd_xyz procedure, pass(z) :: xyzw => c_vect_xyzw - procedure, pass(y) :: mlt_v => c_vect_mlt_v procedure, pass(y) :: mlt_a => c_vect_mlt_a procedure, pass(z) :: mlt_a_2 => c_vect_mlt_a_2 diff --git a/base/modules/serial/psb_d_vect_mod.F90 b/base/modules/serial/psb_d_vect_mod.F90 index ae3062dd..acdce5fd 100644 --- a/base/modules/serial/psb_d_vect_mod.F90 +++ b/base/modules/serial/psb_d_vect_mod.F90 @@ -104,7 +104,6 @@ module psb_d_vect_mod generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 procedure, pass(z) :: upd_xyz => d_vect_upd_xyz procedure, pass(z) :: xyzw => d_vect_xyzw - procedure, pass(y) :: mlt_v => d_vect_mlt_v procedure, pass(y) :: mlt_a => d_vect_mlt_a procedure, pass(z) :: mlt_a_2 => d_vect_mlt_a_2 diff --git a/base/modules/serial/psb_s_vect_mod.F90 b/base/modules/serial/psb_s_vect_mod.F90 index cad4659c..aeccae4d 100644 --- a/base/modules/serial/psb_s_vect_mod.F90 +++ b/base/modules/serial/psb_s_vect_mod.F90 @@ -104,7 +104,6 @@ module psb_s_vect_mod generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 procedure, pass(z) :: upd_xyz => s_vect_upd_xyz procedure, pass(z) :: xyzw => s_vect_xyzw - procedure, pass(y) :: mlt_v => s_vect_mlt_v procedure, pass(y) :: mlt_a => s_vect_mlt_a procedure, pass(z) :: mlt_a_2 => s_vect_mlt_a_2 diff --git a/base/modules/serial/psb_z_vect_mod.F90 b/base/modules/serial/psb_z_vect_mod.F90 index 48f2e947..484d6423 100644 --- a/base/modules/serial/psb_z_vect_mod.F90 +++ b/base/modules/serial/psb_z_vect_mod.F90 @@ -104,7 +104,6 @@ module psb_z_vect_mod generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 procedure, pass(z) :: upd_xyz => z_vect_upd_xyz procedure, pass(z) :: xyzw => z_vect_xyzw - procedure, pass(y) :: mlt_v => z_vect_mlt_v procedure, pass(y) :: mlt_a => z_vect_mlt_a procedure, pass(z) :: mlt_a_2 => z_vect_mlt_a_2