diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90 index 2e90bf2b..bfc6af68 100644 --- a/base/modules/serial/psb_d_base_vect_mod.F90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -2895,9 +2895,9 @@ contains integer(psb_ipk_) :: x_n, y_n, lda, ldb if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() select type(yy => y) type is (psb_d_base_multivect_type) - if (y%is_dev()) call y%sync() x_n = x%get_ncols() y_n = y%get_ncols() lda = x%get_nrows() diff --git a/base/psblas/psb_dprod.f90 b/base/psblas/psb_dprod.f90 index 6cb91acf..0f964a4e 100644 --- a/base/psblas/psb_dprod.f90 +++ b/base/psblas/psb_dprod.f90 @@ -108,7 +108,7 @@ function psb_dprod_multivect(x,y,desc_a,info,trans,global) result(res) if (present(global)) then global_ = global else - global_ = .true. + global_ = .false. end if ix = ione @@ -242,7 +242,7 @@ function psb_dprod_multivect_a(x,y,desc_a,info,trans,global) result(res) if (present(global)) then global_ = global else - global_ = .true. + global_ = .false. end if ix = ione @@ -363,7 +363,7 @@ function psb_dprod_m(x,y,desc_a,info,trans,global) result(res) if (present(global)) then global_ = global else - global_ = .true. + global_ = .false. end if ix = ione diff --git a/base/psblas/psb_dqrfact.f90 b/base/psblas/psb_dqrfact.f90 index 0de4ed16..4866b068 100644 --- a/base/psblas/psb_dqrfact.f90 +++ b/base/psblas/psb_dqrfact.f90 @@ -76,7 +76,7 @@ function psb_dqrfact(x, desc_a, info) result(res) call psb_bcast(ctxt,res) end if - call psb_scatter(temp,x,desc_a,info,root=psb_root_) + call psb_scatter(temp,x,desc_a,info,root=psb_root_,mold=x%v) call psb_erractionrestore(err_act) return diff --git a/cuda/dvectordev.c b/cuda/dvectordev.c index 615f4974..b39f9a24 100644 --- a/cuda/dvectordev.c +++ b/cuda/dvectordev.c @@ -226,12 +226,9 @@ int dotMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA, void* devM struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; spgpuHandle_t handle=psb_cudaGetHandle(); - - for (int j=0; jcount_; j++) { - spgpuDmdot(handle, y_res+devVecA->count_*j, n, - ((double*)devVecA->v_)+devVecA->pitch_*j,(double*)devVecB->v_, - devVecB->count_,devVecB->pitch_); - } + + spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_, + devVecA->count_, devVecA->pitch_); return(i); } diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index 1b9c899b..90507ca0 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -1627,14 +1627,9 @@ contains if (yy%is_host()) call yy%sync() allocate(res(size(x%v,2),size(y%v,2))) info = dotMultiVecDevice(res,nr,x%deviceVect,yy%deviceVect,size(x%v,2)) - if (info /= 0) then - info = psb_err_internal_error_ - call psb_errpush(info,'d_cuda_multi_dot_v') - end if class default - ! y%sync is done in dot_a - call x%sync() - res = y%dot(nr,x%v) + call y%sync() + res = x%dot(nr,y%v) end select end function d_cuda_multi_dot_v diff --git a/cuda/spgpu/kernels/ddot.cu b/cuda/spgpu/kernels/ddot.cu index 1d5f0b42..b0debd47 100644 --- a/cuda/spgpu/kernels/ddot.cu +++ b/cuda/spgpu/kernels/ddot.cu @@ -153,8 +153,12 @@ void spgpuDmdot(spgpuHandle_t handle, double* y, int n, __device double* a, __de { for (int i=0; i= psb_debug_ext_) & - & write(debug_unit,*) me,' ',trim(name),& - & ' present: itrs: ',itrs,nrep - else - nrep = 10 - if (debug_level >= psb_debug_ext_) & - & write(debug_unit,*) me,' ',trim(name),& - & ' not present: itrs: ',itrs,nrep - endif - if (nrep <=0 ) then - info=psb_err_invalid_irst_ - err=info - call psb_errpush(info,name,i_err=(/nrep/)) - goto 9999 - endif - ncv = x%get_ncols() call psb_chkvect(mglob,ncv,x%get_nrows(),lone,lone,desc_a,info) if(info /= psb_success_) then - info=psb_err_from_subroutine_ - call psb_errpush(info,name,a_err='psb_chkvect on X') - goto 9999 + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_chkvect on X') + goto 9999 end if ncv = b%get_ncols() call psb_chkvect(mglob,ncv,b%get_nrows(),lone,lone,desc_a,info) @@ -170,17 +148,17 @@ subroutine psb_dbgmres_multivect(a, prec, b, x, eps, desc_a, info, itmax, iter, naux = 4*n_col nrhs = x%get_ncols() - allocate(aux(naux),h((nrep+1)*nrhs,nrep*nrhs),y(nrep*nrhs,nrhs),& - & vt(n_row,(nrep+1)*nrhs),r0n2(nrhs),rmn2(nrhs),& - & errnum(nrhs),errden(nrhs),stat=info) + allocate(aux(naux),h((nrep+1)*nrhs,nrep*nrhs),y(nrep*nrhs,nrhs),r0n2(nrhs),rmn2(nrhs),stat=info) if (info == psb_success_) call psb_geall(v,desc_a,info,m=nrep+1,n=nrhs) if (info == psb_success_) call psb_geall(w,desc_a,info,n=nrhs) if (info == psb_success_) call psb_geall(r0,desc_a,info,n=nrhs) if (info == psb_success_) call psb_geall(rm,desc_a,info,n=nrhs) + if (info == psb_success_) call psb_geall(temp,desc_a,info,n=nrhs) if (info == psb_success_) call psb_geasb(v,desc_a,info,mold=x%v,n=nrhs) if (info == psb_success_) call psb_geasb(w,desc_a,info,mold=x%v,n=nrhs) if (info == psb_success_) call psb_geasb(r0,desc_a,info,mold=x%v,n=nrhs) if (info == psb_success_) call psb_geasb(rm,desc_a,info,mold=x%v,n=nrhs) + if (info == psb_success_) call psb_geasb(temp,desc_a,info,mold=x%v,n=nrhs) if (info /= psb_success_) then info=psb_err_from_subroutine_non_ call psb_errpush(info,name) @@ -214,13 +192,11 @@ subroutine psb_dbgmres_multivect(a, prec, b, x, eps, desc_a, info, itmax, iter, goto 9999 end if - h = dzero - y = dzero - errnum = dzero - errden = done - deps = eps - itx = 0 - n_add = nrhs-1 + h = dzero + y = dzero + deps = eps + itx = 0 + n_add = nrhs-1 if ((itrace_ > 0).and.(me == psb_root_)) call log_header(methdname) @@ -245,22 +221,18 @@ subroutine psb_dbgmres_multivect(a, prec, b, x, eps, desc_a, info, itmax, iter, end if ! STEP 2: Compute QR_fact(R(0)) - beta = psb_geqrfact(v(1),desc_a,info) + beta = qr_fact(v(1)) if (info /= psb_success_) then info=psb_err_from_subroutine_non_ call psb_errpush(info,name) goto 9999 end if - ! Add V(1) to VT - vt(:,1:nrhs) = v(1)%get_vect() - ! STEP 3: Outer loop outer: do j=1,nrep ! Update itx counter itx = itx + 1 - if (itx >= litmax) exit outer ! Compute j index for H operations idx_j = (j-1)*nrhs+1 @@ -279,10 +251,8 @@ subroutine psb_dbgmres_multivect(a, prec, b, x, eps, desc_a, info, itmax, iter, ! Compute i index for H operations idx_i = (i-1)*nrhs+1 - ! TODO ! STEP 6: Compute H(i,j) = (V(i)**T)*W - h(idx_i:idx_i+n_add,idx_j:idx_j+n_add) = psb_geprod(v(i),w,desc_a,info,trans=.true.) - !h(idx_i:idx_i+n_add,idx_j:idx_j+n_add) = psb_gedot(v(i),w,desc_a,info) + h(idx_i:idx_i+n_add,idx_j:idx_j+n_add) = psb_gedot(v(i),w,desc_a,info) if (info /= psb_success_) then info=psb_err_from_subroutine_non_ call psb_errpush(info,name) @@ -290,9 +260,17 @@ subroutine psb_dbgmres_multivect(a, prec, b, x, eps, desc_a, info, itmax, iter, end if ! STEP 7: Compute W = W - V(i)*H(i,j) - call psb_geaxpby(-done,& - & psb_geprod(v(i),h(idx_i:idx_i+n_add,idx_j:idx_j+n_add),desc_a,info,global=.false.),& - & done,w,desc_a,info) + + ! Compute temp product V(i)*H(i,j) + call mv_prod(v(i)%get_vect(),h(idx_i:idx_i+n_add,idx_j:idx_j+n_add)) + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + ! Compute W + call psb_geaxpby(-done,temp,done,w,desc_a,info) if (info /= psb_success_) then info=psb_err_from_subroutine_non_ call psb_errpush(info,name) @@ -304,7 +282,7 @@ subroutine psb_dbgmres_multivect(a, prec, b, x, eps, desc_a, info, itmax, iter, ! STEP 8: Compute QR_fact(W) ! Store R in H(j+1,j) - h(idx_j+nrhs:idx_j+nrhs+n_add,idx_j:idx_j+n_add) = psb_geqrfact(w,desc_a,info) + h(idx_j+nrhs:idx_j+nrhs+n_add,idx_j:idx_j+n_add) = qr_fact(w) if (info /= psb_success_) then info=psb_err_from_subroutine_non_ call psb_errpush(info,name) @@ -319,12 +297,8 @@ subroutine psb_dbgmres_multivect(a, prec, b, x, eps, desc_a, info, itmax, iter, goto 9999 end if - ! Add V(j+1) to VT - idx = j*nrhs+1 - vt(:,idx:idx+n_add) = v(j+1)%get_vect() - ! STEP 9: Compute Y(j) - call frobenius_norm_min(j) + rmn2 = frobenius_norm_min(j) if (info /= psb_success_) then info=psb_err_from_subroutine_non_ call psb_errpush(info,name) @@ -333,72 +307,56 @@ subroutine psb_dbgmres_multivect(a, prec, b, x, eps, desc_a, info, itmax, iter, ! Compute residues if (istop_ == 1) then - - ! Copy R(0) in R(j) - call psb_geaxpby(done,r0,dzero,rm,desc_a,info) - if (info /= psb_success_) then - info=psb_err_from_subroutine_non_ - call psb_errpush(info,name) - goto 9999 - end if - - ! Compute R(j) = R(0) - VT(j+1)*H(j)*Y(j) - call psb_geaxpby(-done,psb_geprod(psb_geprod(vt(:,1:(j+1)*nrhs),h(1:(j+1)*nrhs,1:j*nrhs),& - & desc_a,info,global=.false.),& - & y(1:j*nrhs,1:nrhs),desc_a,info,global=.false.),done,rm,desc_a,info) - if (info /= psb_success_) then - info=psb_err_from_subroutine_non_ - call psb_errpush(info,name) - goto 9999 - end if - - ! Compute nrm2 of each column of R(j) - rmn2 = psb_genrm2(rm,desc_a,info) - if (info /= psb_success_) then - info=psb_err_from_subroutine_non_ - call psb_errpush(info,name) - goto 9999 - end if - - ! Set error num and den - errnum = rmn2 - errden = r0n2 - + errnum = sum(rmn2)/size(rmn2) + errden = sum(r0n2)/size(r0n2) end if - ! TODO Check convergence (max o media) - if (maxval(errnum).le.(eps*maxval(errden))) then - - ! Exit algorithm + ! Check convergence + if (errnum <= eps*errden) then exit outer - end if ! Log update - if (itrace_ > 0) call log_conv(methdname,me,itx,ione,maxval(errnum),maxval(errden),deps) + if (itrace_ > 0) call log_conv(methdname,me,itx,ione,errnum,errden,deps) end do outer ! STEP 10: X(m) = X(0) + VT(m)*Y(m) - call psb_geaxpby(done,psb_geprod(vt(:,1:j*nrhs),y(1:j*nrhs,:),desc_a,info,global=.false.),done,x,desc_a,info) - if (info /= psb_success_) then - info=psb_err_from_subroutine_non_ - call psb_errpush(info,name) - goto 9999 - end if + do i=1,j + + ! Compute index for Y products + idx = (i-1)*nrhs+1 + + ! Compute temp product V(i)*Y(i) + call mv_prod(v(i)%get_vect(),y(idx:idx+n_add,:)) + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + ! Add temp to X(m) + call psb_geaxpby(done,temp,done,x,desc_a,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + end do ! END algorithm - if (itrace_ > 0) call log_conv(methdname,me,itx,ione,maxval(errnum),maxval(errden),deps) + if (itrace_ > 0) call log_conv(methdname,me,itx,ione,errnum,errden,deps) - call log_end(methdname,me,itx,itrace_,maxval(errnum),maxval(errden),deps,err=derr,iter=iter) + call log_end(methdname,me,itx,itrace_,errnum,errden,deps,err=derr,iter=iter) if (present(err)) err = derr if (info == psb_success_) call psb_gefree(v,desc_a,info) if (info == psb_success_) call psb_gefree(w,desc_a,info) if (info == psb_success_) call psb_gefree(r0,desc_a,info) if (info == psb_success_) call psb_gefree(rm,desc_a,info) - if (info == psb_success_) deallocate(aux,h,y,vt,stat=info) + if (info == psb_success_) deallocate(aux,h,y,r0n2,rmn2,stat=info) if (info /= psb_success_) then info=psb_err_from_subroutine_non_ call psb_errpush(info,name) @@ -413,43 +371,127 @@ subroutine psb_dbgmres_multivect(a, prec, b, x, eps, desc_a, info, itmax, iter, contains - ! Minimize Frobenius norm - subroutine frobenius_norm_min(rep) + ! QR factorization + function qr_fact(mv) result(res) + implicit none + + ! I/O parameters + type(psb_d_multivect_type), intent(inout) :: mv + real(psb_dpk_), allocatable :: res(:,:) + + ! Utils + real(psb_dpk_), allocatable :: vec(:,:) + real(psb_dpk_), allocatable :: tau(:), work(:) + integer(psb_ipk_) :: ii, jj, m, lda, lwork + + ! Allocate output + allocate(res(nrhs,nrhs)) + + ! Gather multivector to factorize + call psb_gather(vec,mv,desc_a,info,root=psb_root_) + + ! If root + if (me == psb_root_) then + + ! Initialize params + m = size(vec,1) + lda = m + lwork = nrhs + allocate(tau(nrhs),work(lwork)) + + ! Perform QR factorization + call dgeqrf(m,nrhs,vec,lda,tau,work,lwork,info) + ! Set R + res = vec(1:nrhs,1:nrhs) + do ii = 2, nrhs + do jj = 1, ii-1 + res(ii,jj) = dzero + enddo + enddo + + ! Generate Q matrix + call dorgqr(m,nrhs,nrhs,vec,lda,tau,work,lwork,info) + + ! Deallocate + deallocate(tau,work) + + end if + + ! Share R + call psb_bcast(ctxt,res) + + ! Scatter Q + call psb_scatter(vec,mv,desc_a,info,root=psb_root_,mold=mv%v) + + end function qr_fact + + ! Multivectors product + subroutine mv_prod(x_mv,y_mv) implicit none - integer(psb_ipk_), intent(in) :: rep + ! I/O parameters + real(psb_dpk_), intent(in) :: x_mv(:,:), y_mv(:,:) - integer(psb_ipk_) :: lwork - real(psb_dpk_), allocatable :: work(:), beta_e1(:,:) + ! Utils + real(psb_dpk_), allocatable :: res(:,:) + integer(psb_ipk_) :: lda, ldb + + ! Initialize params + lda = size(x_mv,1) + ldb = size(y_mv,2) + allocate(res(lda,ldb)) + res = dzero + + ! Compute product + if (n_row > 0) then + call dgemm('N','N',n_row,nrhs,nrhs,done,x_mv,lda,y_mv,ldb,dzero,res,lda) + end if - real(psb_dpk_), allocatable :: h_temp(:,:) - integer(psb_ipk_) :: m_h, n_h, mn + ! Set temp multivector + call temp%set(res) + + ! Deallocate + deallocate(res) + + end subroutine mv_prod + + ! Minimize Frobenius norm + function frobenius_norm_min(rep) result(res) + implicit none + + ! I/O parameters + real(psb_dpk_), allocatable :: res(:) + integer(psb_ipk_), intent(in) :: rep + + ! Utils + integer(psb_ipk_) :: lwork, m_h, n_h, mn + real(psb_dpk_), allocatable :: work(:), beta_e1(:,:), h_temp(:,:) ! Initialize params - h_temp = h m_h = (rep+1)*nrhs n_h = rep*nrhs + h_temp = h(1:m_h,1:n_h) mn = min(m_h,n_h) lwork = max(1,mn+max(mn,nrhs)) - allocate(work(lwork)) + allocate(work(lwork),beta_e1(m_h,nrhs),res(nrhs)) ! Compute E1*beta - allocate(beta_e1(m_h,nrhs)) beta_e1 = dzero beta_e1(1:nrhs,1:nrhs) = beta ! Compute min Frobenius norm - call dgels('N',m_h,n_h,nrhs,h_temp(1:m_h,1:n_h),m_h,beta_e1,m_h,work,lwork,info) + call dgels('N',m_h,n_h,nrhs,h_temp,m_h,beta_e1,m_h,work,lwork,info) ! Set solution y = beta_e1(1:n_h,1:nrhs) + ! Set residues + res = sum(beta_e1(n_h+1:m_h,:)**2,dim=1) + ! Deallocate deallocate(work,h_temp,beta_e1) - return - - end subroutine frobenius_norm_min + end function frobenius_norm_min end subroutine psb_dbgmres_multivect diff --git a/krylov/psb_dkrylov.f90 b/krylov/psb_dkrylov.f90 index 36f48c66..44645c68 100644 --- a/krylov/psb_dkrylov.f90 +++ b/krylov/psb_dkrylov.f90 @@ -257,7 +257,8 @@ end subroutine psb_dkrylov_vect ! itrace - integer(optional) Input: print an informational message ! with the error estimate every itrace ! iterations -! itrs - integer(optional) Input: number of iterations +! irst - integer(optional) Input: restart parameter for RGMRES and +! BICGSTAB(L) methods ! istop - integer(optional) Input: stopping criterion, or how ! to estimate the error. ! 1: err = |r|/(|a||x|+|b|) @@ -266,7 +267,7 @@ end subroutine psb_dkrylov_vect ! estimate of) residual ! Subroutine psb_dkrylov_multivect(method,a,prec,b,x,eps,desc_a,info,& - & itmax,iter,err,itrace,itrs,istop) + & itmax,iter,err,itrace,irst,istop) use psb_base_mod use psb_prec_mod,only : psb_dprec_type @@ -280,13 +281,13 @@ Subroutine psb_dkrylov_multivect(method,a,prec,b,x,eps,desc_a,info,& type(psb_d_multivect_type), Intent(inout) :: x Real(psb_dpk_), Intent(in) :: eps integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), Optional, Intent(in) :: itmax, itrace, itrs, istop + integer(psb_ipk_), Optional, Intent(in) :: itmax, itrace, irst, istop integer(psb_ipk_), Optional, Intent(out) :: iter Real(psb_dpk_), Optional, Intent(out) :: err abstract interface subroutine psb_dkryl_multivect(a,prec,b,x,eps,desc_a,& - &info,itmax,iter,err,itrace,itrs,istop) + &info,itmax,iter,err,itrace,istop) import :: psb_ipk_, psb_dpk_, psb_desc_type, & & psb_dspmat_type, psb_dprec_type, psb_d_multivect_type type(psb_dspmat_type), intent(in) :: a @@ -296,10 +297,25 @@ Subroutine psb_dkrylov_multivect(method,a,prec,b,x,eps,desc_a,info,& real(psb_dpk_), intent(in) :: eps class(psb_dprec_type), intent(inout) :: prec integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), optional, intent(in) :: itmax,itrace,itrs,istop + integer(psb_ipk_), optional, intent(in) :: itmax,itrace,istop integer(psb_ipk_), optional, intent(out) :: iter real(psb_dpk_), optional, intent(out) :: err end subroutine psb_dkryl_multivect + subroutine psb_dkryl_rest_multivect(a,prec,b,x,eps,desc_a,& + &info,itmax,iter,err,itrace,irst,istop) + import :: psb_ipk_, psb_dpk_, psb_desc_type, & + & psb_dspmat_type, psb_dprec_type, psb_d_multivect_type + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_d_multivect_type), Intent(inout) :: b + type(psb_d_multivect_type), Intent(inout) :: x + real(psb_dpk_), intent(in) :: eps + class(psb_dprec_type), intent(inout) :: prec + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), optional, intent(in) :: itmax,itrace,irst,istop + integer(psb_ipk_), optional, intent(out) :: iter + real(psb_dpk_), optional, intent(out) :: err + end subroutine psb_dkryl_rest_multivect end interface procedure(psb_dkryl_multivect) :: psb_dbgmres_multivect @@ -313,7 +329,7 @@ Subroutine psb_dkrylov_multivect(method,a,prec,b,x,eps,desc_a,info,& name = 'psb_krylov' call psb_erractionsave(err_act) - ctxt=desc_a%get_context() + ctxt = desc_a%get_context() call psb_info(ctxt, me, np) @@ -329,13 +345,13 @@ Subroutine psb_dkrylov_multivect(method,a,prec,b,x,eps,desc_a,info,& select case(psb_toupper(method)) case('BGMRES','GMRES') call psb_dbgmres_multivect(a,prec,b,x,eps,desc_a,info,& - & itmax,iter,err,itrace=itrace_,itrs=itrs,istop=istop) + & itmax,iter,err,itrace=itrace_,istop=istop) case default if (me == 0) write(psb_err_unit,*) trim(name),& & ': Warning: Unknown method ',method,& & ', defaulting to BGMRES' call psb_dbgmres_multivect(a,prec,b,x,eps,desc_a,info,& - & itmax,iter,err,itrace=itrace_,itrs=itrs,istop=istop) + & itmax,iter,err,itrace=itrace_,istop=istop) end select if ((info==psb_success_).and.do_alloc_wrk) call prec%free_wrk(info) diff --git a/test/block_krylov/cpu/Makefile b/test/block_krylov/cpu/Makefile index 9fcdfe3c..e5da1271 100644 --- a/test/block_krylov/cpu/Makefile +++ b/test/block_krylov/cpu/Makefile @@ -1,4 +1,4 @@ -INSTALLDIR=../.. +INSTALLDIR=../../.. INCDIR=$(INSTALLDIR)/include/ MODDIR=$(INSTALLDIR)/modules/ include $(INCDIR)/Make.inc.psblas @@ -11,27 +11,27 @@ LDLIBS=$(PSBLDLIBS) FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG). -DBFOBJS=getp.o psb_dbf_sample.o +DBFOBJS=psb_dbf_sample.o +DPDEGEN=psb_dpde_gen.o EXEDIR=./runs -all: runsd psb_dbf_sample +all: runsd psb_dbf_sample psb_dpde_gen runsd: (if test ! -d runs ; then mkdir runs; fi) -psb_dbf_sample.o: getp.o - -psb_dbf_sample: $(DBFOBJS) +psb_dbf_sample: runsd $(DBFOBJS) $(FLINK) $(LOPT) $(DBFOBJS) -o psb_dbf_sample $(PSBLAS_LIB) $(LDLIBS) /bin/mv psb_dbf_sample $(EXEDIR) -.f90.o: - $(MPFC) $(FCOPT) $(FINCLUDES) $(FDEFINES) -c $< +psb_dpde_gen: runsd $(DPDEGEN) + $(FLINK) $(LOPT) $(DPDEGEN) -o psb_dpde_gen $(PSBLAS_LIB) $(LDLIBS) + /bin/mv psb_dpde_gen $(EXEDIR) clean: - /bin/rm -f $(DBFOBJS)\ - *$(.mod) $(EXEDIR)/psb_*f_sample + /bin/rm -f $(DBFOBJS) $(DPDEGEN)\ + *$(.mod) $(EXEDIR)/psb_* lib: (cd ../../; make library) diff --git a/test/block_krylov/cpu/getp.f90 b/test/block_krylov/cpu/getp.f90 deleted file mode 100644 index 197cb646..00000000 --- a/test/block_krylov/cpu/getp.f90 +++ /dev/null @@ -1,169 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -Module getp - interface get_parms - module procedure get_dparms - end interface - -contains - ! - ! Get iteration parameters from the command line - ! - subroutine get_dparms(ctxt,mtrx_file,rhs_file,filefmt,kmethd,ptype,part,& - & afmt,nrhs,istopc,itmax,itrace,itrs,eps) - use psb_base_mod - type(psb_ctxt_type) :: ctxt - character(len=2) :: filefmt - character(len=40) :: kmethd, mtrx_file, rhs_file, ptype - character(len=20) :: part - integer(psb_ipk_) :: nrhs,iret,istopc,itmax,itrace,itrs - character(len=40) :: charbuf - real(psb_dpk_) :: eps - character :: afmt*5 - integer(psb_ipk_) :: np, iam - integer(psb_ipk_) :: inparms(40), ip, inp_unit - character(len=1024) :: filename - - call psb_info(ctxt,iam,np) - if (iam == 0) then - if (command_argument_count()>0) then - call get_command_argument(1,filename) - inp_unit = 30 - open(inp_unit,file=filename,action='read',iostat=info) - if (info /= 0) then - write(psb_err_unit,*) 'Could not open file ',filename,' for input' - call psb_abort(ctxt) - stop - else - write(psb_err_unit,*) 'Opened file ',trim(filename),' for input' - end if - else - inp_unit=psb_inp_unit - end if - ! Read Input Parameters - read(inp_unit,*) ip - if (ip >= 5) then - read(inp_unit,*) mtrx_file - read(inp_unit,*) rhs_file - read(inp_unit,*) filefmt - read(inp_unit,*) kmethd - read(inp_unit,*) ptype - read(inp_unit,*) afmt - read(inp_unit,*) part - - call psb_bcast(ctxt,mtrx_file) - call psb_bcast(ctxt,rhs_file) - call psb_bcast(ctxt,filefmt) - call psb_bcast(ctxt,kmethd) - call psb_bcast(ctxt,ptype) - call psb_bcast(ctxt,afmt) - call psb_bcast(ctxt,part) - - if (ip >= 7) then - read(inp_unit,*) nrhs - else - nrhs=4 - endif - if (ip >= 8) then - read(inp_unit,*) istopc - else - istopc=1 - endif - if (ip >= 9) then - read(inp_unit,*) itmax - else - itmax=500 - endif - if (ip >= 10) then - read(inp_unit,*) itrace - else - itrace=-1 - endif - if (ip >= 11) then - read(inp_unit,*) itrs - else - itrs = 1 - endif - if (ip >= 12) then - read(inp_unit,*) eps - else - eps=1.d-6 - endif - inparms(1) = nrhs - inparms(2) = istopc - inparms(3) = itmax - inparms(4) = itrace - inparms(5) = itrs - call psb_bcast(ctxt,inparms(1:5)) - call psb_bcast(ctxt,eps) - - write(psb_out_unit,'(" ")') - write(psb_out_unit,'("Solving matrix : ",a)') mtrx_file - write(psb_out_unit,'("Number of processors : ",i3)') np - write(psb_out_unit,'("Data distribution : ",a)') part - write(psb_out_unit,'("Iterative method : ",a)') kmethd - write(psb_out_unit,'("Preconditioner : ",a)') ptype - write(psb_out_unit,'("Number of RHS : ",i3)') nrhs - write(psb_out_unit,'("Number of iterations : ",i3)') itrs - write(psb_out_unit,'("Storage format : ",a)') afmt(1:3) - write(psb_out_unit,'(" ")') - else - write(psb_err_unit,*) 'Wrong format for input file' - call psb_abort(ctxt) - stop 1 - end if - if (inp_unit /= psb_inp_unit) then - close(inp_unit) - end if - else - ! Receive Parameters - call psb_bcast(ctxt,mtrx_file) - call psb_bcast(ctxt,rhs_file) - call psb_bcast(ctxt,filefmt) - call psb_bcast(ctxt,kmethd) - call psb_bcast(ctxt,ptype) - call psb_bcast(ctxt,afmt) - call psb_bcast(ctxt,part) - - call psb_bcast(ctxt,inparms(1:5)) - nrhs = inparms(1) - istopc = inparms(2) - itmax = inparms(3) - itrace = inparms(4) - itrs = inparms(5) - call psb_bcast(ctxt,eps) - - end if - - end subroutine get_dparms - -end module getp diff --git a/test/block_krylov/cpu/psb_dbf_sample.f90 b/test/block_krylov/cpu/psb_dbf_sample.f90 index d6be4716..ecdf9643 100644 --- a/test/block_krylov/cpu/psb_dbf_sample.f90 +++ b/test/block_krylov/cpu/psb_dbf_sample.f90 @@ -4,7 +4,6 @@ program psb_dbf_sample use psb_prec_mod use psb_krylov_mod use psb_util_mod - use getp implicit none @@ -36,7 +35,7 @@ program psb_dbf_sample integer(psb_lpk_) :: lnp ! solver paramters - integer(psb_ipk_) :: iter, itmax, ierr, itrace, ircode, methd, istopc, itrs + integer(psb_ipk_) :: iter, itmax, ierr, itrace, ircode, methd, istopc integer(psb_epk_) :: amatsize, precsize, descsize real(psb_dpk_) :: err, eps @@ -52,7 +51,7 @@ program psb_dbf_sample real(psb_dpk_), allocatable :: resmx(:) real(psb_dpk_) :: resmxp integer(psb_ipk_), allocatable :: ivg(:) - logical :: print_matrix = .false. + logical :: print_matrix = .true. call psb_init(ctxt) call psb_info(ctxt,iam,np) @@ -80,7 +79,7 @@ program psb_dbf_sample ! get parameters ! call get_parms(ctxt,mtrx_file,rhs_file,filefmt,kmethd,ptype,& - & part,afmt,nrhs,istopc,itmax,itrace,itrs,eps) + & part,afmt,nrhs,istopc,itmax,itrace,eps) call psb_barrier(ctxt) t1 = psb_wtime() @@ -133,9 +132,9 @@ program psb_dbf_sample b_mv_glob => aux_b(:,:) do i=1, m do j=1, nrhs - !b_mv_glob(i,j) = done - call random_number(random_value) - b_mv_glob(i,j) = random_value + b_mv_glob(i,j) = done + !call random_number(random_value) + !b_mv_glob(i,j) = random_value enddo enddo endif @@ -210,8 +209,7 @@ program psb_dbf_sample t1 = psb_wtime() call psb_krylov(kmethd,a,prec,b_mv,x_mv,eps,desc_a,info,& - & itmax=itmax,iter=iter,err=err,itrace=itrace,& - & itrs=itrs,istop=istopc) + & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc) call psb_barrier(ctxt) t2 = psb_wtime() - t1 @@ -221,11 +219,11 @@ program psb_dbf_sample write(psb_out_unit,'("Finished algorithm")') write(psb_out_unit,'(" ")') end if - + call psb_geaxpby(done,b_mv,dzero,r_mv,desc_a,info) call psb_spmm(-done,a,x_mv,done,r_mv,desc_a,info) - resmx = psb_genrm2(r_mv,desc_a,info) + resmx = psb_genrm2(r_mv,desc_a,info) resmxp = psb_geamax(r_mv,desc_a,info) amatsize = a%sizeof() @@ -256,18 +254,18 @@ program psb_dbf_sample write(psb_out_unit,'("Time to solve system: ",es12.5)')t2 write(psb_out_unit,'("Time per iteration: ",es12.5)')t2/(iter) write(psb_out_unit,'("Total time: ",es12.5)')t2+tprec - write(psb_out_unit,'("Residual norm 2: ",es12.5)')maxval(resmx) + write(psb_out_unit,'("Residual norm 2: ",es12.5)')sum(resmx)/size(resmx) write(psb_out_unit,'("Residual norm inf: ",es12.5)')resmxp - write(psb_out_unit,'(a8,4(2x,a20))') 'I','X(I)','R(I)','B(I)' if (print_matrix) then + write(psb_out_unit,'(a8,4(2x,a20))') 'I','X(I)','R(I)','B(I)' do i=1,m write(psb_out_unit,993) i, x_mv_glob(i,:), r_mv_glob(i,:), b_mv_glob(i,:) end do end if end if -998 format(i8,4(2x,g20.14)) -993 format(i6,4(1x,e12.6)) +998 format(i8,10(2x,g20.14)) +993 format(i6,10(1x,e12.6)) call psb_gefree(b_mv, desc_a,info) call psb_gefree(x_mv, desc_a,info) @@ -282,4 +280,101 @@ program psb_dbf_sample return +contains + ! + ! get iteration parameters from standard input + ! + subroutine get_parms(ctxt,mtrx_file,rhs_file,filefmt,kmethd,ptype,part,afmt,nrhs,istopc,itmax,itrace,eps) + type(psb_ctxt_type) :: ctxt + character(len=2) :: filefmt + character(len=40) :: kmethd, mtrx_file, rhs_file, ptype + character(len=20) :: part + character(len=5) :: afmt + integer(psb_ipk_) :: nrhs, istopc, itmax, itrace + real(psb_dpk_) :: eps + + integer(psb_ipk_) :: np, iam + integer(psb_ipk_) :: ip, inp_unit + character(len=1024) :: filename + + call psb_info(ctxt, iam, np) + + if (iam == 0) then + if (command_argument_count()>0) then + call get_command_argument(1, filename) + inp_unit = 30 + open(inp_unit, file=filename, action='read', iostat=info) + if (info /= 0) then + write(psb_err_unit,*) 'Could not open file ',filename,' for input' + call psb_abort(ctxt) + stop + else + write(psb_err_unit,*) 'Opened file ',trim(filename),' for input' + end if + else + inp_unit = psb_inp_unit + end if + + ! Read Input Parameters + read(inp_unit,*) mtrx_file + read(inp_unit,*) rhs_file + read(inp_unit,*) filefmt + read(inp_unit,*) kmethd + read(inp_unit,*) ptype + read(inp_unit,*) afmt + read(inp_unit,*) part + read(inp_unit,*) nrhs + read(inp_unit,*) istopc + read(inp_unit,*) itmax + read(inp_unit,*) itrace + read(inp_unit,*) eps + + call psb_bcast(ctxt,mtrx_file) + call psb_bcast(ctxt,rhs_file) + call psb_bcast(ctxt,filefmt) + call psb_bcast(ctxt,kmethd) + call psb_bcast(ctxt,ptype) + call psb_bcast(ctxt,afmt) + call psb_bcast(ctxt,part) + call psb_bcast(ctxt,nrhs) + call psb_bcast(ctxt,istopc) + call psb_bcast(ctxt,itmax) + call psb_bcast(ctxt,itrace) + call psb_bcast(ctxt,eps) + + if (inp_unit /= psb_inp_unit) then + close(inp_unit) + end if + else + ! Receive Parameters + call psb_bcast(ctxt,mtrx_file) + call psb_bcast(ctxt,rhs_file) + call psb_bcast(ctxt,filefmt) + call psb_bcast(ctxt,kmethd) + call psb_bcast(ctxt,ptype) + call psb_bcast(ctxt,afmt) + call psb_bcast(ctxt,part) + call psb_bcast(ctxt,nrhs) + call psb_bcast(ctxt,istopc) + call psb_bcast(ctxt,itmax) + call psb_bcast(ctxt,itrace) + call psb_bcast(ctxt,eps) + end if + + if (iam == 0) then + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Solving matrix : ",a)') mtrx_file + write(psb_out_unit,'("Data distribution : ",a)') part + write(psb_out_unit,'("Iterative method : ",a)') kmethd + write(psb_out_unit,'("Number of processors : ",i0)') np + write(psb_out_unit,'("Number of RHS : ",i4)') nrhs + write(psb_out_unit,'("Max number of iterations : ",i3)') itmax + write(psb_out_unit,'("Storage format : ",a)') afmt + write(psb_out_unit,'(" ")') + end if + + return + + end subroutine get_parms + end program psb_dbf_sample diff --git a/test/block_krylov/cpu/psb_dpde_gen.F90 b/test/block_krylov/cpu/psb_dpde_gen.F90 new file mode 100644 index 00000000..8f00733b --- /dev/null +++ b/test/block_krylov/cpu/psb_dpde_gen.F90 @@ -0,0 +1,804 @@ +module psb_d_pde3d_mod + + use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_lpk_, psb_desc_type,& + & psb_dspmat_type, psb_d_multivect_type, dzero,& + & psb_d_base_sparse_mat, psb_d_base_multivect_type, & + & psb_i_base_vect_type, psb_l_base_vect_type + + interface + function d_func_3d(x,y,z) result(val) + import :: psb_dpk_ + real(psb_dpk_), intent(in) :: x,y,z + real(psb_dpk_) :: val + end function d_func_3d + end interface + + interface psb_gen_pde3d + module procedure psb_d_gen_pde3d + end interface psb_gen_pde3d + +contains + + function d_null_func_3d(x,y,z) result(val) + + real(psb_dpk_), intent(in) :: x,y,z + real(psb_dpk_) :: val + + val = dzero + + end function d_null_func_3d + ! + ! functions parametrizing the differential equation + ! + + ! + ! Note: b1, b2 and b3 are the coefficients of the first + ! derivative of the unknown function. The default + ! we apply here is to have them zero, so that the resulting + ! matrix is symmetric/hermitian and suitable for + ! testing with CG and FCG. + ! When testing methods for non-hermitian matrices you can + ! change the B1/B2/B3 functions to e.g. done/sqrt((3*done)) + ! + function b1(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b1 + real(psb_dpk_), intent(in) :: x,y,z + b1=done/sqrt((3*done)) + end function b1 + function b2(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b2 + real(psb_dpk_), intent(in) :: x,y,z + b2=done/sqrt((3*done)) + end function b2 + function b3(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b3 + real(psb_dpk_), intent(in) :: x,y,z + b3=done/sqrt((3*done)) + end function b3 + function c(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: c + real(psb_dpk_), intent(in) :: x,y,z + c=dzero + end function c + function a1(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a1 + real(psb_dpk_), intent(in) :: x,y,z + a1=done/80 + end function a1 + function a2(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a2 + real(psb_dpk_), intent(in) :: x,y,z + a2=done/80 + end function a2 + function a3(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a3 + real(psb_dpk_), intent(in) :: x,y,z + a3=done/80 + end function a3 + function g(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: g + real(psb_dpk_), intent(in) :: x,y,z + g = dzero + if (x == done) then + g = done + else if (x == dzero) then + g = exp(y**2-z**2) + end if + end function g + + + ! + ! subroutine to allocate and fill in the coefficient matrix and + ! the rhs. + ! + subroutine psb_d_gen_pde3d(ctxt,idim,a,bmv,xmv,nrhs,desc_a,afmt,info,& + & f,amold,vmold,imold,partition,nrl,iv,tnd) + use psb_base_mod + use psb_util_mod + ! + ! Discretizes the partial differential equation + ! + ! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) + ! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f + ! dxdx dydy dzdz dx dy dz + ! + ! with Dirichlet boundary conditions + ! u = g + ! + ! on the unit cube 0<=x,y,z<=1. + ! + ! + ! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation. + ! + implicit none + integer(psb_ipk_) :: idim + type(psb_dspmat_type) :: a + type(psb_d_multivect_type) :: xmv,bmv + integer(psb_ipk_) :: nrhs + type(psb_desc_type) :: desc_a + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: info + character(len=*) :: afmt + procedure(d_func_3d), optional :: f + class(psb_d_base_sparse_mat), optional :: amold + class(psb_d_base_multivect_type), optional :: vmold + class(psb_i_base_vect_type), optional :: imold + integer(psb_ipk_), optional :: partition, nrl,iv(:) + logical, optional :: tnd + ! Local variables. + + integer(psb_ipk_), parameter :: nb=20 + type(psb_d_csc_sparse_mat) :: acsc + type(psb_d_coo_sparse_mat) :: acoo + type(psb_d_csr_sparse_mat) :: acsr + real(psb_dpk_) :: zt(nb,nrhs),x,y,z + integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_ + integer(psb_lpk_) :: m,n,glob_row,nt + integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner + ! For 3D partition + ! Note: integer control variables going directly into an MPI call + ! must be 4 bytes, i.e. psb_mpk_ + integer(psb_mpk_) :: npdims(3), npp, minfo + integer(psb_ipk_) :: npx,npy,npz, iamx,iamy,iamz,mynx,myny,mynz + integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:) + ! Process grid + integer(psb_ipk_) :: np, iam + integer(psb_ipk_) :: icoeff + integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:) + real(psb_dpk_), allocatable :: val(:) + ! deltah dimension of each grid cell + ! deltat discretization time + real(psb_dpk_) :: deltah, sqdeltah, deltah2 + real(psb_dpk_), parameter :: rhs=dzero,one=done,zero=dzero + real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb + integer(psb_ipk_) :: err_act + procedure(d_func_3d), pointer :: f_ + logical :: tnd_ + character(len=20) :: name, ch_err,tmpfmt + + info = psb_success_ + name = 'create_matrix' + call psb_erractionsave(err_act) + + call psb_info(ctxt, iam, np) + + + if (present(f)) then + f_ => f + else + f_ => d_null_func_3d + end if + + deltah = done/(idim+2) + sqdeltah = deltah*deltah + deltah2 = (2*done)* deltah + + if (present(partition)) then + if ((1<= partition).and.(partition <= 3)) then + partition_ = partition + else + write(*,*) 'Invalid partition choice ',partition,' defaulting to 3' + partition_ = 3 + end if + else + partition_ = 3 + end if + + ! initialize array descriptor and sparse matrix storage. provide an + ! estimate of the number of non zeroes + + m = (1_psb_lpk_*idim)*idim*idim + n = m + nnz = ((n*7)/(np)) + if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n + t0 = psb_wtime() + select case(partition_) + case(1) + ! A BLOCK partition + if (present(nrl)) then + nr = nrl + else + ! + ! Using a simple BLOCK distribution. + ! + nt = (m+np-1)/np + nr = max(0,min(nt,m-(iam*nt))) + end if + + nt = nr + call psb_sum(ctxt,nt) + if (nt /= m) then + write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! First example of use of CDALL: specify for each process a number of + ! contiguous rows + ! + call psb_cdall(ctxt,desc_a,info,nl=nr) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(2) + ! A partition defined by the user through IV + + if (present(iv)) then + if (size(iv) /= m) then + write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + else + write(psb_err_unit,*) iam, 'Initialization error: IV not present' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! Second example of use of CDALL: specify for each row the + ! process that owns it + ! + call psb_cdall(ctxt,desc_a,info,vg=iv) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(3) + ! A 3-dimensional partition + + ! A nifty MPI function will split the process list + npdims = 0 + call mpi_dims_create(np,3,npdims,info) + npx = npdims(1) + npy = npdims(2) + npz = npdims(3) + + allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz)) + ! We can reuse idx2ijk for process indices as well. + call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0) + ! Now let's split the 3D cube in hexahedra + call dist1Didx(bndx,idim,npx) + mynx = bndx(iamx+1)-bndx(iamx) + call dist1Didx(bndy,idim,npy) + myny = bndy(iamy+1)-bndy(iamy) + call dist1Didx(bndz,idim,npz) + mynz = bndz(iamz+1)-bndz(iamz) + + ! How many indices do I own? + nlr = mynx*myny*mynz + allocate(myidx(nlr)) + ! Now, let's generate the list of indices I own + nr = 0 + do i=bndx(iamx),bndx(iamx+1)-1 + do j=bndy(iamy),bndy(iamy+1)-1 + do k=bndz(iamz),bndz(iamz+1)-1 + nr = nr + 1 + call ijk2idx(myidx(nr),i,j,k,idim,idim,idim) + end do + end do + end do + if (nr /= nlr) then + write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',& + & nr,nlr,mynx,myny,mynz + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + end if + + ! + ! Third example of use of CDALL: specify for each process + ! the set of global indices it owns. + ! + call psb_cdall(ctxt,desc_a,info,vl=myidx) + + case default + write(psb_err_unit,*) iam, 'Initialization error: should not get here' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end select + + + if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz,& + & dupl=psb_dupl_err_) + ! define rhs from boundary conditions; also build initial guess + if (info == psb_success_) call psb_geall(xmv,desc_a,info,n=nrhs) + if (info == psb_success_) call psb_geall(bmv,desc_a,info,n=nrhs) + + call psb_barrier(ctxt) + talc = psb_wtime()-t0 + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='allocation rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! we build an auxiliary matrix consisting of one row at a + ! time; just a small matrix. might be extended to generate + ! a bunch of rows per call. + ! + allocate(val(20*nb),irow(20*nb),& + &icol(20*nb),stat=info) + if (info /= psb_success_ ) then + info=psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + endif + + + ! loop over rows belonging to current process in a block + ! distribution. + + call psb_barrier(ctxt) + t1 = psb_wtime() + do ii=1, nlr,nb + ib = min(nb,nlr-ii+1) + icoeff = 1 + do k=1,ib + i=ii+k-1 + ! local matrix pointer + glob_row=myidx(i) + ! compute gridpoint coordinates + call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) + ! x, y, z coordinates + x = (ix-1)*deltah + y = (iy-1)*deltah + z = (iz-1)*deltah + zt(k,:) = f_(x,y,z) + ! internal point: build discretization + ! + ! term depending on (x-1,y,z) + ! + val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2 + if (ix == 1) then + zt(k,:) = g(dzero,y,z)*(-val(icoeff)) + zt(k,:) + else + call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y-1,z) + val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2 + if (iy == 1) then + zt(k,:) = g(x,dzero,z)*(-val(icoeff)) + zt(k,:) + else + call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y,z-1) + val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2 + if (iz == 1) then + zt(k,:) = g(x,y,dzero)*(-val(icoeff)) + zt(k,:) + else + call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + ! term depending on (x,y,z) + val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah & + & + c(x,y,z) + call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + ! term depending on (x,y,z+1) + val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2 + if (iz == idim) then + zt(k,:) = g(x,y,done)*(-val(icoeff)) + zt(k,:) + else + call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y+1,z) + val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2 + if (iy == idim) then + zt(k,:) = g(x,done,z)*(-val(icoeff)) + zt(k,:) + else + call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x+1,y,z) + val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2 + if (ix==idim) then + zt(k,:) = g(done,y,z)*(-val(icoeff)) + zt(k,:) + else + call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + end do + call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) + if(info /= psb_success_) exit + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib,:),bmv,desc_a,info) + if(info /= psb_success_) exit + zt(:,:)=dzero + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib,:),xmv,desc_a,info) + if(info /= psb_success_) exit + end do + + tgen = psb_wtime()-t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='insert rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + deallocate(val,irow,icol) + + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_cdasb(desc_a,info,mold=imold) + tcdasb = psb_wtime()-t1 + call psb_barrier(ctxt) + t1 = psb_wtime() + if (info == psb_success_) then + if (present(amold)) then + call psb_spasb(a,desc_a,info,mold=amold,bld_and=tnd) + else + call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=tnd) + end if + end if + call psb_barrier(ctxt) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (info == psb_success_) call psb_geasb(xmv,desc_a,info,mold=vmold) + if (info == psb_success_) call psb_geasb(bmv,desc_a,info,mold=vmold) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + tasb = psb_wtime()-t1 + call psb_barrier(ctxt) + ttot = psb_wtime() - t0 + + call psb_amx(ctxt,talc) + call psb_amx(ctxt,tgen) + call psb_amx(ctxt,tasb) + call psb_amx(ctxt,ttot) + if(iam == psb_root_) then + tmpfmt = a%get_fmt() + write(psb_out_unit,'("The matrix has been generated and assembled in ",a3," format.")')& + & tmpfmt + write(psb_out_unit,'("-allocation time : ",es12.5)') talc + write(psb_out_unit,'("-coeff. gen. time : ",es12.5)') tgen + write(psb_out_unit,'("-desc asbly time : ",es12.5)') tcdasb + write(psb_out_unit,'("- mat asbly time : ",es12.5)') tasb + write(psb_out_unit,'("-total time : ",es12.5)') ttot + + end if + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + end subroutine psb_d_gen_pde3d + +end module psb_d_pde3d_mod + +program dpdegen + use psb_base_mod + use psb_util_mod + use psb_prec_mod + use psb_krylov_mod + use psb_d_pde3d_mod + + implicit none + + ! input parameters + character(len=40) :: kmethd, ptype + character(len=5) :: afmt + integer(psb_ipk_) :: idim, nrhs, istopc, itmax, itrace + real(psb_dpk_) :: eps + + ! sparse matrix + type(psb_dspmat_type) :: a + + ! preconditioner data + type(psb_dprec_type) :: prec + + ! dense matrices + real(psb_dpk_), allocatable :: b_mv_glob(:,:), x_mv_glob(:,:), r_mv_glob(:,:) + type(psb_d_multivect_type) :: b_mv, x_mv, r_mv + integer(psb_ipk_) :: m + + ! blacs parameters + type(psb_desc_type) :: desc_a + type(psb_ctxt_type) :: ctxt + integer :: iam, np + + ! solver parameters + real(psb_dpk_) :: err, cond + integer(psb_epk_) :: amatsize, precsize, descsize + integer(psb_ipk_) :: iter, ierr, ircode + + ! other variables + integer(psb_ipk_) :: info, i, j, rep + real(psb_dpk_) :: t1, t2, tprec + character(len=20) :: name, ch_err + real(psb_dpk_) :: random_value + real(psb_dpk_) :: resmxp + real(psb_dpk_), allocatable :: resmx(:) + logical :: print_matrix = .false. + + ! Init environment + info=psb_success_ + call psb_init(ctxt) + call psb_info(ctxt,iam,np) + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + if(psb_get_errstatus() /= 0) goto 9999 + name='pdegenmm_block' + ! + ! Hello world + ! + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ + write(*,*) 'This is the ',trim(name),' sample program' + write(psb_out_unit,'("Number of processors: ",i8)')np + end if + ! + ! get parameters + ! + call get_parms(ctxt,kmethd,ptype,idim,afmt,nrhs,istopc,itmax,itrace,eps) + ! + ! allocate and fill in the coefficient matrix and initial vectors + ! + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_gen_pde3d(ctxt,idim,a,b_mv,x_mv,nrhs,desc_a,afmt,info,partition=3) + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='create_matrix' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (iam == psb_root_) write(psb_out_unit,'("Overall matrix creation time : ",es12.5)')t2 + if (iam == psb_root_) write(psb_out_unit,'(" ")') + + call psb_geall(r_mv,desc_a,info,nrhs) + call r_mv%zero() + call psb_geasb(r_mv,desc_a,info) + + t2 = psb_wtime() - t1 + call psb_amx(ctxt, t2) + + if (iam == psb_root_) then + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2 + write(psb_out_unit,'(" ")') + end if + + ! building the preconditioner + call prec%init(ctxt,ptype,info) + t1 = psb_wtime() + call prec%build(a,desc_a,info) + tprec = psb_wtime()-t1 + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='psb_precbld') + goto 9999 + end if + + call psb_amx(ctxt,tprec) + + if(iam == psb_root_) then + write(psb_out_unit,'("Preconditioner time: ",es12.5)')tprec + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Starting algorithm")') + write(psb_out_unit,'(" ")') + end if + + call psb_barrier(ctxt) + t1 = psb_wtime() + + call psb_krylov(kmethd,a,prec,b_mv,x_mv,eps,desc_a,info,& + & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc) + + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='solver routine' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + + if(iam == psb_root_) then + write(psb_out_unit,'("Finished algorithm")') + write(psb_out_unit,'(" ")') + end if + + call psb_geaxpby(done,b_mv,dzero,r_mv,desc_a,info) + call psb_spmm(-done,a,x_mv,done,r_mv,desc_a,info) + + resmx = psb_genrm2(r_mv,desc_a,info) + resmxp = psb_geamax(r_mv,desc_a,info) + + amatsize = a%sizeof() + descsize = desc_a%sizeof() + precsize = prec%sizeof() + + call psb_sum(ctxt,amatsize) + call psb_sum(ctxt,descsize) + call psb_sum(ctxt,precsize) + + call psb_gather(x_mv_glob,x_mv,desc_a,info,root=psb_root_) + if (info == psb_success_) call psb_gather(b_mv_glob,b_mv,desc_a,info,root=psb_root_) + if (info == psb_success_) call psb_gather(r_mv_glob,r_mv,desc_a,info,root=psb_root_) + if (info /= psb_success_) goto 9999 + + if (iam == psb_root_) then + call prec%descr(info) + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Computed solution on: ",i8," processors")')np + write(psb_out_unit,'("Storage format for A: ",a)')a%get_fmt() + write(psb_out_unit,'("Storage format for DESC_A: ",a)')desc_a%get_fmt() + write(psb_out_unit,'("Total memory occupation for A: ",i12)')amatsize + write(psb_out_unit,'("Total memory occupation for PREC: ",i12)')precsize + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + write(psb_out_unit,'("Iterations to convergence: ",i12)')iter + write(psb_out_unit,'("Error estimate on exit: ",es12.5)')err + write(psb_out_unit,'("Time to buil prec.: ",es12.5)')tprec + write(psb_out_unit,'("Time to solve system: ",es12.5)')t2 + write(psb_out_unit,'("Time per iteration: ",es12.5)')t2/(iter) + write(psb_out_unit,'("Total time: ",es12.5)')t2+tprec + write(psb_out_unit,'("Residual norm 2: ",es12.5)')sum(resmx)/size(resmx) + write(psb_out_unit,'("Residual norm inf: ",es12.5)')resmxp + if (print_matrix) then + write(psb_out_unit,'(a8,4(2x,a20))') 'I','X(I)','R(I)','B(I)' + do i=1,m + write(psb_out_unit,993) i, x_mv_glob(i,:), r_mv_glob(i,:), b_mv_glob(i,:) + end do + end if + write(psb_out_unit,'(" ")') + end if + +998 format(i8,20(2x,g20.14)) +993 format(i6,20(1x,e12.6)) + + ! + ! cleanup storage and exit + ! + call psb_gefree(b_mv,desc_a,info) + call psb_gefree(x_mv,desc_a,info) + call psb_spfree(a,desc_a,info) + call prec%free(info) + call psb_cdfree(desc_a,info) + call psb_exit(ctxt) + + return + +9999 call psb_error(ctxt) + + return + +contains + ! + ! get iteration parameters from standard input + ! + subroutine get_parms(ctxt,kmethd,ptype,idim,afmt,nrhs,istopc,itmax,itrace,eps) + type(psb_ctxt_type) :: ctxt + character(len=40) :: kmethd, ptype + character(len=5) :: afmt + integer(psb_ipk_) :: idim, nrhs, istopc, itmax, itrace + real(psb_dpk_) :: eps + + integer(psb_ipk_) :: np, iam + integer(psb_ipk_) :: ip, inp_unit + character(len=1024) :: filename + + call psb_info(ctxt, iam, np) + + if (iam == 0) then + if (command_argument_count()>0) then + call get_command_argument(1, filename) + inp_unit = 30 + open(inp_unit, file=filename, action='read', iostat=info) + if (info /= 0) then + write(psb_err_unit,*) 'Could not open file ',filename,' for input' + call psb_abort(ctxt) + stop + else + write(psb_err_unit,*) 'Opened file ',trim(filename),' for input' + end if + else + inp_unit = psb_inp_unit + end if + + ! Read Input Parameters + read(inp_unit,*) kmethd + read(inp_unit,*) ptype + read(inp_unit,*) idim + read(inp_unit,*) afmt + read(inp_unit,*) nrhs + read(inp_unit,*) istopc + read(inp_unit,*) itmax + read(inp_unit,*) itrace + read(inp_unit,*) eps + + call psb_bcast(ctxt,kmethd) + call psb_bcast(ctxt,ptype) + call psb_bcast(ctxt,idim) + call psb_bcast(ctxt,afmt) + call psb_bcast(ctxt,nrhs) + call psb_bcast(ctxt,istopc) + call psb_bcast(ctxt,itmax) + call psb_bcast(ctxt,itrace) + call psb_bcast(ctxt,eps) + + if (inp_unit /= psb_inp_unit) then + close(inp_unit) + end if + else + ! Receive Parameters + call psb_bcast(ctxt,kmethd) + call psb_bcast(ctxt,ptype) + call psb_bcast(ctxt,idim) + call psb_bcast(ctxt,afmt) + call psb_bcast(ctxt,nrhs) + call psb_bcast(ctxt,istopc) + call psb_bcast(ctxt,itmax) + call psb_bcast(ctxt,itrace) + call psb_bcast(ctxt,eps) + end if + + if (iam == 0) then + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Grid dimensions : ",i4,"x",i4,"x",i4)') idim,idim,idim + write(psb_out_unit,'("Iterative method : ",a)') kmethd + write(psb_out_unit,'("Number of processors : ",i0)') np + write(psb_out_unit,'("Number of RHS : ",i4)') nrhs + write(psb_out_unit,'("Number of iterations : ",i3)') itmax + write(psb_out_unit,'("Storage format : ",a)') afmt + write(psb_out_unit,'(" ")') + end if + + return + + end subroutine get_parms + +end program dpdegen diff --git a/test/block_krylov/cpu/psb_perf_test.f90 b/test/block_krylov/cpu/psb_perf_test.f90 deleted file mode 100644 index ecc3b55a..00000000 --- a/test/block_krylov/cpu/psb_perf_test.f90 +++ /dev/null @@ -1,296 +0,0 @@ -program psb_perf_test - - use psb_base_mod - use psb_prec_mod - use psb_krylov_mod - use psb_util_mod - - implicit none - - ! input parameters - character(len=40) :: mtrx_file = "pde80.mtx" - character(len=2) :: filefmt = "MM" - character(len=40) :: kmethd = "GMRES" - character(len=40) :: ptype = "NONE" - character(len=20) :: part = "GRAPH" - character(len=5) :: afmt = "CSR" - integer(psb_ipk_) :: nrhs = 4 - integer(psb_ipk_) :: istopbg = 1 - integer(psb_ipk_) :: istoprg = 2 - integer(psb_ipk_) :: itmax = 500 - integer(psb_ipk_) :: itrace = -1 - integer(psb_ipk_) :: itrs = 10 - real(psb_dpk_) :: eps = 1.d-7 - - integer(psb_ipk_) :: status - - ! sparse matrices - type(psb_dspmat_type) :: a - type(psb_ldspmat_type) :: aux_a - integer(psb_ipk_), parameter :: iunit=12 - - ! preconditioner data - type(psb_dprec_type) :: prec - - ! dense matrices - real(psb_dpk_), allocatable, target :: aux_b(:,:) - real(psb_dpk_), allocatable, save :: x_mv_glob(:,:), r_mv_glob(:,:) - real(psb_dpk_), allocatable, save :: x_col_glob(:), r_col_glob(:) - real(psb_dpk_), pointer :: b_mv_glob(:,:) - real(psb_dpk_), pointer :: b_col_glob(:) - type(psb_d_multivect_type) :: b_mv, x_mv, r_mv - type(psb_d_vect_type) :: b_col, x_col, r_col - integer(psb_ipk_) :: m - real(psb_dpk_) :: random_value - - ! communications data structure - type(psb_desc_type) :: desc_a - type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: iam, np - integer(psb_lpk_) :: lnp - - ! solver paramters - integer(psb_ipk_) :: iter, ierr, ircode, methd - integer(psb_epk_) :: amatsize, precsize, descsize - real(psb_dpk_) :: err, cond - - ! other variables - character(len=20) :: name - integer(psb_ipk_) :: i, j, info, rep, reps = 10 - real(psb_dpk_) :: tb1, tb2, tr1, tr2 - real(psb_dpk_), allocatable :: resmx(:), tb(:), tr(:) - real(psb_dpk_) :: resmxp - integer(psb_ipk_), allocatable :: ivg(:) - - call psb_init(ctxt) - call psb_info(ctxt,iam,np) - if (iam < 0) then - ! This should not happen, but just in case - call psb_exit(ctxt) - stop - endif - - name='psb_perf_test' - if(psb_errstatus_fatal()) goto 9999 - info=psb_success_ - call psb_set_errverbosity(itwo) - - call psb_barrier(ctxt) - - if (iam == psb_root_) then - select case(psb_toupper(filefmt)) - case('MM') - call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file) - case ('HB') - call hb_read(aux_a,info,iunit=iunit,b=aux_b,filename=mtrx_file) - case default - info = -1 - write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt - end select - if (info /= psb_success_) then - write(psb_err_unit,*) 'Error while reading input matrix ' - call psb_abort(ctxt) - end if - - m = aux_a%get_nrows() - call psb_bcast(ctxt,m) - - ! At this point aux_b may still be unallocated - if (size(aux_b) == m*nrhs) then - ! if any rhs were present, broadcast the first one - write(psb_err_unit,'("Ok, got an rhs ")') - b_mv_glob =>aux_b(:,:) - else - write(psb_out_unit,'("Generating an rhs...")') - write(psb_out_unit,'("Number of RHS: ",i3)') nrhs - write(psb_out_unit,'(" ")') - call psb_realloc(m,nrhs,aux_b,ircode) - if (ircode /= 0) then - call psb_errpush(psb_err_alloc_dealloc_,name) - goto 9999 - endif - - b_mv_glob => aux_b(:,:) - do i=1, m - do j=1, nrhs - b_mv_glob(i,j) = done - !call random_number(random_value) - !b_mv_glob(i,j) = random_value - enddo - enddo - endif - else - call psb_bcast(ctxt,m) - end if - - ! switch over different partition types - select case(psb_toupper(part)) - case('BLOCK') - if (iam == psb_root_) write(psb_out_unit,'("Partition type: block")') - call psb_matdist(aux_a,a,ctxt,desc_a,info,fmt=afmt,parts=part_block) - case('GRAPH') - if (iam == psb_root_) then - write(psb_out_unit,'("Partition type: graph vector")') - write(psb_out_unit,'(" ")') - call aux_a%cscnv(info,type='csr') - lnp = np - call build_mtpart(aux_a,lnp) - endif - call psb_barrier(ctxt) - call distr_mtpart(psb_root_,ctxt) - call getv_mtpart(ivg) - call psb_matdist(aux_a,a,ctxt,desc_a,info,fmt=afmt,vg=ivg) - case default - if (iam == psb_root_) write(psb_out_unit,'("Partition type: block")') - call psb_matdist(aux_a,a,ctxt,desc_a,info,fmt=afmt,parts=part_block) - end select - - ! building the preconditioner - call prec%init(ctxt,ptype,info) - call prec%build(a,desc_a,info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='psb_precbld') - goto 9999 - end if - - if (iam == psb_root_) then - allocate(tb(reps),tr(reps)) - tb = dzero - tr = dzero - end if - - do rep=1,reps - - call psb_scatter(b_mv_glob,b_mv,desc_a,info,root=psb_root_) - call psb_geall(x_mv,desc_a,info,nrhs) - call x_mv%zero() - call psb_geasb(x_mv,desc_a,info) - call psb_geall(r_mv,desc_a,info,nrhs) - call r_mv%zero() - call psb_geasb(r_mv,desc_a,info) - - if(iam == psb_root_) then - write(psb_out_unit,'(" ")') - write(psb_out_unit,'("Starting BGMRES")') - write(psb_out_unit,'(" ")') - end if - - call psb_barrier(ctxt) - tb1 = psb_wtime() - - call psb_krylov(kmethd,a,prec,b_mv,x_mv,eps,desc_a,info,& - & itmax=itmax,iter=iter,err=err,itrace=itrace,& - & itrs=itrs,istop=istopbg) - - call psb_barrier(ctxt) - tb2 = psb_wtime() - tb1 - call psb_amx(ctxt,tb2) - - if (iam == psb_root_) then - tb(rep) = tb2 - end if - call psb_barrier(ctxt) - - call psb_geaxpby(done,b_mv,dzero,r_mv,desc_a,info) - call psb_spmm(-done,a,x_mv,done,r_mv,desc_a,info) - resmx = psb_genrm2(r_mv,desc_a,info) - resmxp = psb_geamax(r_mv,desc_a,info) - call psb_gather(x_mv_glob,x_mv,desc_a,info,root=psb_root_) - if (info == psb_success_) call psb_gather(r_mv_glob,r_mv,desc_a,info,root=psb_root_) - if (info /= psb_success_) goto 9999 - - if(iam == psb_root_) then - write(psb_out_unit,'(" ")') - write(psb_out_unit,'("Finished BGMRES")') - write(psb_out_unit,'(" ")') - end if - - call psb_gefree(b_mv, desc_a,info) - call psb_gefree(x_mv, desc_a,info) - - if(iam == psb_root_) then - write(psb_out_unit,'(" ")') - write(psb_out_unit,'("Starting sGMRES")') - write(psb_out_unit,'(" ")') - end if - - call psb_barrier(ctxt) - - do i=1,nrhs - if(iam == psb_root_) then - write(psb_out_unit,'(" ")') - write(psb_out_unit,'("Step ",i6)')i - write(psb_out_unit,'(" ")') - end if - - b_col_glob => aux_b(:,i) - call psb_scatter(b_col_glob,b_col,desc_a,info,root=psb_root_) - call psb_geall(x_col,desc_a,info) - call x_col%zero() - call psb_geasb(x_col,desc_a,info) - call psb_geall(r_col,desc_a,info) - call r_col%zero() - call psb_geasb(r_col,desc_a,info) - - cond = dzero - - call psb_barrier(ctxt) - tr1 = psb_wtime() - - call psb_krylov(kmethd,a,prec,b_col,x_col,eps,desc_a,info,& - & itmax=itmax,iter=iter,err=err,itrace=itrace,& - & istop=2,irst=itrs,cond=cond) - - call psb_barrier(ctxt) - tr2 = psb_wtime() - tr1 - call psb_amx(ctxt,tr2) - - if (iam == psb_root_) then - tr(rep) = tr(rep) + tr2 - end if - call psb_barrier(ctxt) - - call psb_geaxpby(done,b_col,dzero,r_col,desc_a,info) - call psb_spmm(-done,a,x_col,done,r_col,desc_a,info) - resmx = psb_genrm2(r_col,desc_a,info) - resmxp = psb_geamax(r_col,desc_a,info) - call psb_gather(x_col_glob,x_col,desc_a,info,root=psb_root_) - if (info == psb_success_) call psb_gather(r_col_glob,r_col,desc_a,info,root=psb_root_) - if (info /= psb_success_) goto 9999 - - call psb_gefree(b_col, desc_a,info) - call psb_gefree(x_col, desc_a,info) - end do - - end do - - call psb_barrier(ctxt) - - if(iam == psb_root_) then - write(psb_out_unit,'(" ")') - write(psb_out_unit,'("Finished sGMRES")') - write(psb_out_unit,'(" ")') - write(*,*) 'TB ', sum(tb)/size(tb), ' TR ', sum(tr)/size(tr) - end if - - call psb_spfree(a, desc_a,info) - call prec%free(info) - call psb_cdfree(desc_a,info) - call psb_exit(ctxt) - -! open(unit=10,file='test.csv',status='replace',action='write',iostat=status) - -! do i=1,size(x_mv_glob,1) -! write(10, 993) x_mv_glob(i,:) -! end do - -! 993 format(1x,*(g0, "; ")) - -! return - -9999 call psb_error(ctxt) - - return - -end program psb_perf_test - diff --git a/test/block_krylov/gpu/Makefile b/test/block_krylov/gpu/Makefile index 5979bf56..7395fe3f 100644 --- a/test/block_krylov/gpu/Makefile +++ b/test/block_krylov/gpu/Makefile @@ -17,26 +17,20 @@ LDLIBSG=$(PSBGPULDLIBS) FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_DEFINES) -DBFOBJS=getp.o psb_dbf_sample.o -DCOMP=psb_dbf_compare.o +DBFOBJS=psb_dbf_sample.o DPDEGEN=psb_dpde_gen.o + EXEDIR=./runs -all: dir psb_dbf_sample psb_dbf_compare psb_dpde_gen +all: dir psb_dbf_sample psb_dpde_gen dir: (if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi) -psb_dbf_sample.o: getp.o - psb_dbf_sample: dir $(DBFOBJS) $(FLINK) $(LOPT) $(DBFOBJS) -fopenmp -o psb_dbf_sample $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBSG) /bin/mv psb_dbf_sample $(EXEDIR) -psb_dbf_compare: $(DCOMP) - $(FLINK) $(LOPT) $(DCOMP) -fopenmp -o psb_dbf_compare $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBSG) - /bin/mv psb_dbf_compare $(EXEDIR) - psb_dpde_gen: dir $(DPDEGEN) $(FLINK) $(LOPT) $(DPDEGEN) -fopenmp -o psb_dpde_gen $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBSG) /bin/mv psb_dpde_gen $(EXEDIR) diff --git a/test/block_krylov/gpu/getp.f90 b/test/block_krylov/gpu/getp.f90 deleted file mode 100644 index 197cb646..00000000 --- a/test/block_krylov/gpu/getp.f90 +++ /dev/null @@ -1,169 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -Module getp - interface get_parms - module procedure get_dparms - end interface - -contains - ! - ! Get iteration parameters from the command line - ! - subroutine get_dparms(ctxt,mtrx_file,rhs_file,filefmt,kmethd,ptype,part,& - & afmt,nrhs,istopc,itmax,itrace,itrs,eps) - use psb_base_mod - type(psb_ctxt_type) :: ctxt - character(len=2) :: filefmt - character(len=40) :: kmethd, mtrx_file, rhs_file, ptype - character(len=20) :: part - integer(psb_ipk_) :: nrhs,iret,istopc,itmax,itrace,itrs - character(len=40) :: charbuf - real(psb_dpk_) :: eps - character :: afmt*5 - integer(psb_ipk_) :: np, iam - integer(psb_ipk_) :: inparms(40), ip, inp_unit - character(len=1024) :: filename - - call psb_info(ctxt,iam,np) - if (iam == 0) then - if (command_argument_count()>0) then - call get_command_argument(1,filename) - inp_unit = 30 - open(inp_unit,file=filename,action='read',iostat=info) - if (info /= 0) then - write(psb_err_unit,*) 'Could not open file ',filename,' for input' - call psb_abort(ctxt) - stop - else - write(psb_err_unit,*) 'Opened file ',trim(filename),' for input' - end if - else - inp_unit=psb_inp_unit - end if - ! Read Input Parameters - read(inp_unit,*) ip - if (ip >= 5) then - read(inp_unit,*) mtrx_file - read(inp_unit,*) rhs_file - read(inp_unit,*) filefmt - read(inp_unit,*) kmethd - read(inp_unit,*) ptype - read(inp_unit,*) afmt - read(inp_unit,*) part - - call psb_bcast(ctxt,mtrx_file) - call psb_bcast(ctxt,rhs_file) - call psb_bcast(ctxt,filefmt) - call psb_bcast(ctxt,kmethd) - call psb_bcast(ctxt,ptype) - call psb_bcast(ctxt,afmt) - call psb_bcast(ctxt,part) - - if (ip >= 7) then - read(inp_unit,*) nrhs - else - nrhs=4 - endif - if (ip >= 8) then - read(inp_unit,*) istopc - else - istopc=1 - endif - if (ip >= 9) then - read(inp_unit,*) itmax - else - itmax=500 - endif - if (ip >= 10) then - read(inp_unit,*) itrace - else - itrace=-1 - endif - if (ip >= 11) then - read(inp_unit,*) itrs - else - itrs = 1 - endif - if (ip >= 12) then - read(inp_unit,*) eps - else - eps=1.d-6 - endif - inparms(1) = nrhs - inparms(2) = istopc - inparms(3) = itmax - inparms(4) = itrace - inparms(5) = itrs - call psb_bcast(ctxt,inparms(1:5)) - call psb_bcast(ctxt,eps) - - write(psb_out_unit,'(" ")') - write(psb_out_unit,'("Solving matrix : ",a)') mtrx_file - write(psb_out_unit,'("Number of processors : ",i3)') np - write(psb_out_unit,'("Data distribution : ",a)') part - write(psb_out_unit,'("Iterative method : ",a)') kmethd - write(psb_out_unit,'("Preconditioner : ",a)') ptype - write(psb_out_unit,'("Number of RHS : ",i3)') nrhs - write(psb_out_unit,'("Number of iterations : ",i3)') itrs - write(psb_out_unit,'("Storage format : ",a)') afmt(1:3) - write(psb_out_unit,'(" ")') - else - write(psb_err_unit,*) 'Wrong format for input file' - call psb_abort(ctxt) - stop 1 - end if - if (inp_unit /= psb_inp_unit) then - close(inp_unit) - end if - else - ! Receive Parameters - call psb_bcast(ctxt,mtrx_file) - call psb_bcast(ctxt,rhs_file) - call psb_bcast(ctxt,filefmt) - call psb_bcast(ctxt,kmethd) - call psb_bcast(ctxt,ptype) - call psb_bcast(ctxt,afmt) - call psb_bcast(ctxt,part) - - call psb_bcast(ctxt,inparms(1:5)) - nrhs = inparms(1) - istopc = inparms(2) - itmax = inparms(3) - itrace = inparms(4) - itrs = inparms(5) - call psb_bcast(ctxt,eps) - - end if - - end subroutine get_dparms - -end module getp diff --git a/test/block_krylov/gpu/psb_dbf_sample.F90 b/test/block_krylov/gpu/psb_dbf_sample.F90 index f1e65bc7..a69599bd 100644 --- a/test/block_krylov/gpu/psb_dbf_sample.F90 +++ b/test/block_krylov/gpu/psb_dbf_sample.F90 @@ -4,10 +4,7 @@ program psb_dbf_sample use psb_prec_mod use psb_krylov_mod use psb_util_mod -#ifdef HAVE_CUDA use psb_cuda_mod -#endif - use getp implicit none @@ -35,7 +32,7 @@ program psb_dbf_sample type(psb_d_cuda_csrg_sparse_mat), target :: acsrg type(psb_d_cuda_hlg_sparse_mat), target :: ahlg type(psb_d_cuda_elg_sparse_mat), target :: aelg - class(psb_d_base_sparse_mat), pointer :: agmold, acmold + class(psb_d_base_sparse_mat), pointer :: amold ! communications data structure type(psb_desc_type) :: desc_a @@ -44,12 +41,12 @@ program psb_dbf_sample integer(psb_lpk_) :: lnp ! solver paramters - integer(psb_ipk_) :: iter, itmax, ierr, itrace, ircode, methd, istopc, itrs + integer(psb_ipk_) :: iter, itmax, ierr, itrace, ircode, methd, istopc integer(psb_epk_) :: amatsize, precsize, descsize real(psb_dpk_) :: err, eps ! input parameters - character(len=5) :: afmt, agfmt = "HLG" + character(len=5) :: afmt character(len=20) :: name, part character(len=2) :: filefmt integer(psb_ipk_), parameter :: iunit=12 @@ -57,10 +54,10 @@ program psb_dbf_sample ! other variables integer(psb_ipk_) :: i, j, info real(psb_dpk_) :: t1, t2, tprec - real(psb_dpk_), allocatable :: resmx(:), res(:,:) + real(psb_dpk_), allocatable :: resmx(:), res(:,:), temp(:,:), ttt(:,:) real(psb_dpk_) :: resmxp integer(psb_ipk_), allocatable :: ivg(:) - logical :: print_matrix = .false. + logical :: print_matrix = .true. call psb_init(ctxt) call psb_info(ctxt,iam,np) @@ -85,26 +82,24 @@ program psb_dbf_sample write(psb_out_unit,'("This is the ",a," sample program")') trim(name) write(psb_out_unit,'(" ")') end if -#ifdef HAVE_CUDA - write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() - write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_cuda_DeviceName()) -#endif + write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_cuda_DeviceName()) ! ! get parameters ! call get_parms(ctxt,mtrx_file,rhs_file,filefmt,kmethd,ptype,& - & part,afmt,nrhs,istopc,itmax,itrace,itrs,eps) - - select case(psb_toupper(agfmt)) - case('ELG') - agmold => aelg - case('HLG') - agmold => ahlg - case('CSRG') - agmold => acsrg - case default - write(*,*) 'Unknown format defaulting to CSRG' - agmold => acsrg + & part,afmt,nrhs,istopc,itmax,itrace,eps) + + select case(psb_toupper(afmt)) + case('ELG') + amold => aelg + case('HLG') + amold => ahlg + case('CSRG') + amold => acsrg + case default + write(*,*) 'Unknown format defaulting to CSRG' + amold => acsrg end select call psb_barrier(ctxt) @@ -174,7 +169,7 @@ program psb_dbf_sample select case(psb_toupper(part)) case('BLOCK') if (iam == psb_root_) write(psb_out_unit,'("Partition type: block")') - call psb_matdist(aux_a,a,ctxt,desc_a,info,fmt=afmt,parts=part_block) + call psb_matdist(aux_a,a,ctxt,desc_a,info,fmt='CSR ',parts=part_block) case('GRAPH') if (iam == psb_root_) then @@ -188,18 +183,18 @@ program psb_dbf_sample call psb_barrier(ctxt) call distr_mtpart(psb_root_,ctxt) call getv_mtpart(ivg) - call psb_matdist(aux_a,a,ctxt,desc_a,info,fmt=afmt,vg=ivg) + call psb_matdist(aux_a,a,ctxt,desc_a,info,fmt='CSR ',vg=ivg) case default if (iam == psb_root_) write(psb_out_unit,'("Partition type: block")') - call psb_matdist(aux_a,a,ctxt,desc_a,info,fmt=afmt,parts=part_block) + call psb_matdist(aux_a,a,ctxt,desc_a,info,fmt='CSR ',parts=part_block) end select - call a%cscnv(agpu,info,mold=agmold) - if ((info /= 0).or.(psb_get_errstatus()/=0)) then - write(0,*) 'From cscnv ',info - call psb_error() - stop + call a%cscnv(agpu,info,mold=amold) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From cscnv ',info + call psb_error() + stop end if call desc_a%cnv(mold=imold) @@ -243,8 +238,7 @@ program psb_dbf_sample t1 = psb_wtime() call psb_krylov(kmethd,agpu,prec,b_mv,x_mv,eps,desc_a,info,& - & itmax=itmax,iter=iter,err=err,itrace=itrace,& - & itrs=itrs,istop=istopc) + & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc) call psb_barrier(ctxt) t2 = psb_wtime() - t1 @@ -254,11 +248,11 @@ program psb_dbf_sample write(psb_out_unit,'("Finished algorithm")') write(psb_out_unit,'(" ")') end if - + call psb_geaxpby(done,b_mv,dzero,r_mv,desc_a,info) call psb_spmm(-done,a,x_mv,done,r_mv,desc_a,info) - resmx = psb_genrm2(r_mv,desc_a,info) + resmx = psb_genrm2(r_mv,desc_a,info) resmxp = psb_geamax(r_mv,desc_a,info) amatsize = a%sizeof() @@ -289,18 +283,18 @@ program psb_dbf_sample write(psb_out_unit,'("Time to solve system: ",es12.5)')t2 write(psb_out_unit,'("Time per iteration: ",es12.5)')t2/(iter) write(psb_out_unit,'("Total time: ",es12.5)')t2+tprec - write(psb_out_unit,'("Residual norm 2: ",es12.5)')maxval(resmx) + write(psb_out_unit,'("Residual norm 2: ",es12.5)')sum(resmx)/size(resmx) write(psb_out_unit,'("Residual norm inf: ",es12.5)')resmxp - write(psb_out_unit,'(a8,4(2x,a20))') 'I','X(I)','R(I)','B(I)' if (print_matrix) then + write(psb_out_unit,'(a8,4(2x,a20))') 'I','X(I)','R(I)','B(I)' do i=1,m write(psb_out_unit,993) i, x_mv_glob(i,:), r_mv_glob(i,:), b_mv_glob(i,:) end do end if end if -998 format(i8,4(2x,g20.14)) -993 format(i6,4(1x,e12.6)) +998 format(i8,10(2x,g20.14)) +993 format(i6,10(1x,e12.6)) call psb_gefree(b_mv,desc_a,info) call psb_gefree(x_mv,desc_a,info) @@ -316,4 +310,101 @@ program psb_dbf_sample return +contains + ! + ! get iteration parameters from standard input + ! + subroutine get_parms(ctxt,mtrx_file,rhs_file,filefmt,kmethd,ptype,part,afmt,nrhs,istopc,itmax,itrace,eps) + type(psb_ctxt_type) :: ctxt + character(len=2) :: filefmt + character(len=40) :: kmethd, mtrx_file, rhs_file, ptype + character(len=20) :: part + character(len=5) :: afmt + integer(psb_ipk_) :: nrhs, istopc, itmax, itrace + real(psb_dpk_) :: eps + + integer(psb_ipk_) :: np, iam + integer(psb_ipk_) :: ip, inp_unit + character(len=1024) :: filename + + call psb_info(ctxt, iam, np) + + if (iam == 0) then + if (command_argument_count()>0) then + call get_command_argument(1, filename) + inp_unit = 30 + open(inp_unit, file=filename, action='read', iostat=info) + if (info /= 0) then + write(psb_err_unit,*) 'Could not open file ',filename,' for input' + call psb_abort(ctxt) + stop + else + write(psb_err_unit,*) 'Opened file ',trim(filename),' for input' + end if + else + inp_unit = psb_inp_unit + end if + + ! Read Input Parameters + read(inp_unit,*) mtrx_file + read(inp_unit,*) rhs_file + read(inp_unit,*) filefmt + read(inp_unit,*) kmethd + read(inp_unit,*) ptype + read(inp_unit,*) afmt + read(inp_unit,*) part + read(inp_unit,*) nrhs + read(inp_unit,*) istopc + read(inp_unit,*) itmax + read(inp_unit,*) itrace + read(inp_unit,*) eps + + call psb_bcast(ctxt,mtrx_file) + call psb_bcast(ctxt,rhs_file) + call psb_bcast(ctxt,filefmt) + call psb_bcast(ctxt,kmethd) + call psb_bcast(ctxt,ptype) + call psb_bcast(ctxt,afmt) + call psb_bcast(ctxt,part) + call psb_bcast(ctxt,nrhs) + call psb_bcast(ctxt,istopc) + call psb_bcast(ctxt,itmax) + call psb_bcast(ctxt,itrace) + call psb_bcast(ctxt,eps) + + if (inp_unit /= psb_inp_unit) then + close(inp_unit) + end if + else + ! Receive Parameters + call psb_bcast(ctxt,mtrx_file) + call psb_bcast(ctxt,rhs_file) + call psb_bcast(ctxt,filefmt) + call psb_bcast(ctxt,kmethd) + call psb_bcast(ctxt,ptype) + call psb_bcast(ctxt,afmt) + call psb_bcast(ctxt,part) + call psb_bcast(ctxt,nrhs) + call psb_bcast(ctxt,istopc) + call psb_bcast(ctxt,itmax) + call psb_bcast(ctxt,itrace) + call psb_bcast(ctxt,eps) + end if + + if (iam == 0) then + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Solving matrix : ",a)') mtrx_file + write(psb_out_unit,'("Data distribution : ",a)') part + write(psb_out_unit,'("Iterative method : ",a)') kmethd + write(psb_out_unit,'("Number of processors : ",i0)') np + write(psb_out_unit,'("Number of RHS : ",i4)') nrhs + write(psb_out_unit,'("Max number of iterations : ",i3)') itmax + write(psb_out_unit,'("Storage format : ",a)') afmt + write(psb_out_unit,'(" ")') + end if + + return + + end subroutine get_parms + end program psb_dbf_sample diff --git a/test/block_krylov/gpu/psb_dpde_gen.F90 b/test/block_krylov/gpu/psb_dpde_gen.F90 index 73d6930b..72c9c43f 100644 --- a/test/block_krylov/gpu/psb_dpde_gen.F90 +++ b/test/block_krylov/gpu/psb_dpde_gen.F90 @@ -525,7 +525,7 @@ program dpdegen ! input parameters character(len=40) :: kmethd, ptype character(len=5) :: afmt - integer(psb_ipk_) :: idim, nrhs, istopc, itmax, itrace, itrs + integer(psb_ipk_) :: idim, nrhs, istopc, itmax, itrace real(psb_dpk_) :: eps ! sparse matrix @@ -535,8 +535,7 @@ program dpdegen type(psb_dprec_type) :: prec ! dense matrices - real(psb_dpk_), allocatable :: x_mv_glob(:,:), r_mv_glob(:,:) - real(psb_dpk_), allocatable :: b_mv_glob(:,:) + real(psb_dpk_), allocatable :: b_mv_glob(:,:), x_mv_glob(:,:), r_mv_glob(:,:) type(psb_d_multivect_type) :: b_mv, x_mv, r_mv type(psb_d_multivect_cuda) :: gpumold type(psb_i_vect_cuda) :: imold @@ -549,7 +548,6 @@ program dpdegen ! solver parameters real(psb_dpk_) :: err, cond - integer(psb_ipk_) :: reps = 2 integer(psb_epk_) :: amatsize, precsize, descsize integer(psb_ipk_) :: iter, ierr, ircode @@ -590,12 +588,12 @@ program dpdegen write(*,*) 'This is the ',trim(name),' sample program' write(psb_out_unit,'("Number of processors: ",i8)')np end if - write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() - write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_cuda_DeviceName()) + write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice()+1,' out of', psb_cuda_getDeviceCount() + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice()+1,' is a: ', trim(psb_cuda_DeviceName()) ! ! get parameters ! - call get_parms(ctxt,kmethd,ptype,idim,afmt,nrhs,istopc,itmax,itrace,itrs,eps) + call get_parms(ctxt,kmethd,ptype,idim,afmt,nrhs,istopc,itmax,itrace,eps) ! ! allocate and fill in the coefficient matrix and initial vectors ! @@ -642,19 +640,6 @@ program dpdegen stop end if - ! Set RHS - call psb_geall(b_mv_glob,desc_a,info,n=nrhs) - do i=1,b_mv%get_nrows() - do j=1,b_mv%get_ncols() - call random_number(random_value) - b_mv_glob(i,j) = random_value - end do - end do - - !call psb_scatter(b_mv_glob,b_mv,desc_a,info,root=psb_root_,mold=gpumold) - call psb_geall(x_mv,desc_a,info,nrhs) - call x_mv%zero() - call psb_geasb(x_mv,desc_a,info,mold=gpumold) call psb_geall(r_mv,desc_a,info,nrhs) call r_mv%zero() call psb_geasb(r_mv,desc_a,info) @@ -691,8 +676,7 @@ program dpdegen t1 = psb_wtime() call psb_krylov(kmethd,a,prec,b_mv,x_mv,eps,desc_a,info,& - & itmax=itmax,iter=iter,err=err,itrace=itrace,& - & itrs=itrs,istop=istopc) + & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc) call psb_barrier(ctxt) t2 = psb_wtime() - t1 @@ -718,6 +702,7 @@ program dpdegen call psb_sum(ctxt,precsize) call psb_gather(x_mv_glob,x_mv,desc_a,info,root=psb_root_) + if (info == psb_success_) call psb_gather(b_mv_glob,b_mv,desc_a,info,root=psb_root_) if (info == psb_success_) call psb_gather(r_mv_glob,r_mv,desc_a,info,root=psb_root_) if (info /= psb_success_) goto 9999 @@ -736,18 +721,18 @@ program dpdegen write(psb_out_unit,'("Time to solve system: ",es12.5)')t2 write(psb_out_unit,'("Time per iteration: ",es12.5)')t2/(iter) write(psb_out_unit,'("Total time: ",es12.5)')t2+tprec - write(psb_out_unit,'("Residual norm 2: ",es12.5)')maxval(resmx) + write(psb_out_unit,'("Residual norm 2: ",es12.5)')sum(resmx)/size(resmx) write(psb_out_unit,'("Residual norm inf: ",es12.5)')resmxp - write(psb_out_unit,'(a8,4(2x,a20))') 'I','X(I)','R(I)','B(I)' if (print_matrix) then + write(psb_out_unit,'(a8,4(2x,a20))') 'I','X(I)','R(I)','B(I)' do i=1,m write(psb_out_unit,993) i, x_mv_glob(i,:), r_mv_glob(i,:), b_mv_glob(i,:) end do end if end if -998 format(i8,4(2x,g20.14)) -993 format(i6,4(1x,e12.6)) +998 format(i8,20(2x,g20.14)) +993 format(i6,20(1x,e12.6)) ! ! cleanup storage and exit @@ -770,11 +755,11 @@ contains ! ! get iteration parameters from standard input ! - subroutine get_parms(ctxt,kmethd,ptype,idim,afmt,nrhs,istopc,itmax,itrace,itrs,eps) + subroutine get_parms(ctxt,kmethd,ptype,idim,afmt,nrhs,istopc,itmax,itrace,eps) type(psb_ctxt_type) :: ctxt character(len=40) :: kmethd, ptype character(len=5) :: afmt - integer(psb_ipk_) :: idim, nrhs, istopc, itmax, itrace, itrs + integer(psb_ipk_) :: idim, nrhs, istopc, itmax, itrace real(psb_dpk_) :: eps integer(psb_ipk_) :: np, iam @@ -808,7 +793,6 @@ contains read(inp_unit,*) istopc read(inp_unit,*) itmax read(inp_unit,*) itrace - read(inp_unit,*) itrs read(inp_unit,*) eps call psb_bcast(ctxt,kmethd) @@ -819,7 +803,6 @@ contains call psb_bcast(ctxt,istopc) call psb_bcast(ctxt,itmax) call psb_bcast(ctxt,itrace) - call psb_bcast(ctxt,itrs) call psb_bcast(ctxt,eps) if (inp_unit /= psb_inp_unit) then @@ -835,7 +818,6 @@ contains call psb_bcast(ctxt,istopc) call psb_bcast(ctxt,itmax) call psb_bcast(ctxt,itrace) - call psb_bcast(ctxt,itrs) call psb_bcast(ctxt,eps) end if @@ -845,7 +827,7 @@ contains write(psb_out_unit,'("Iterative method : ",a)') kmethd write(psb_out_unit,'("Number of processors : ",i0)') np write(psb_out_unit,'("Number of RHS : ",i4)') nrhs - write(psb_out_unit,'("Number of iterations : ",i3)') itrs + write(psb_out_unit,'("Number of iterations : ",i3)') itmax write(psb_out_unit,'("Storage format : ",a)') afmt write(psb_out_unit,'(" ")') end if diff --git a/test/block_krylov/kernel/dpdegenmm.F90 b/test/block_krylov/kernel/dpdegenmm.F90 index 2f16db6d..a942a88f 100644 --- a/test/block_krylov/kernel/dpdegenmm.F90 +++ b/test/block_krylov/kernel/dpdegenmm.F90 @@ -588,7 +588,7 @@ program pdegenmm type(psb_d_vect_cuda) :: tmold type(psb_i_vect_cuda) :: imold #endif - real(psb_dpk_), allocatable :: x1(:,:), x2(:,:), x0(:,:) + real(psb_dpk_), allocatable :: x1(:,:), x2(:,:), x0(:,:), test(:) ! blacs parameters type(psb_ctxt_type) :: ctxt integer :: iam, np @@ -659,11 +659,6 @@ program pdegenmm ! get parameters ! call get_parms(ctxt,nrhs,acfmt,agfmt,idim,tnd) - !nrhs=8 - !acfmt='CSR' - !agfmt='CSRG' - !idim=100 - !tnd=.false. call psb_init_timers() ! ! allocate and fill in the coefficient matrix and initial vectors @@ -800,43 +795,6 @@ program pdegenmm call x_mv_g%set(x0) ! FIXME: cache flush needed here - x1 = b_mv%get_vect() - x2 = b_mv_g%get_vect() - -! ! TODO test AXPBY -! call psb_geall(xg,desc_a,info) -! call psb_geasb(xg,desc_a,info,mold=tmold) -! call xg%set(done) -! call xg%sync() -! call psb_geall(bg,desc_a,info) -! call psb_geasb(bg,desc_a,info,mold=tmold) -! !call bg%set(done+done) - -! ! ! TODO: Non funziona spgpuDaxpby (axpbyMultiVecDeviceDouble) -! call psb_geaxpby(done,xg,dzero,bg,desc_a,info) -! call psb_cuda_DeviceSync() - -! write(*,*) 'BG ', bg%is_dev(), bg%is_host(), bg%is_sync() -! call bg%sync() -! write(*,*) 'BG ', bg%is_dev(), bg%is_host(), bg%is_sync() -! do i=1,8 -! write(*,*) bg%v%v(i) -! end do - -! return - -! call x_mv_g%set(done) -! call x_mv_g%sync() - -! call psb_geaxpby(done,x_mv_g,dzero,b_mv_g,desc_a,info) - -! call b_mv_g%sync() -! do i=1,size(b_mv_g%v%v,1) -! write(*,*) b_mv_g%v%v(i,:) -! end do - -! return - call psb_barrier(ctxt) tt1 = psb_wtime() do i=1,ntests