Enable OpenMP in test/pargen

omp-threadsafe
sfilippone 2 years ago
parent 05b684ddbb
commit 08ff37332a

@ -452,9 +452,6 @@ contains
call psb_barrier(ctxt) call psb_barrier(ctxt)
t1 = psb_wtime() t1 = psb_wtime()
!$omp parallel shared(deltah,myidx,a,desc_a) !$omp parallel shared(deltah,myidx,a,desc_a)
! we build an auxiliary matrix consisting of one row at a
! time; just a small matrix. might be extended to generate
! a bunch of rows per call.
! !
block block
integer(psb_ipk_) :: i,j,ii,ib,icoeff, ix,iy,iz, ith,nth integer(psb_ipk_) :: i,j,ii,ib,icoeff, ix,iy,iz, ith,nth
@ -639,16 +636,7 @@ contains
write(psb_out_unit,'("-total time : ",es12.5)') ttot write(psb_out_unit,'("-total time : ",es12.5)') ttot
end if end if
!!$ !$omp parallel
!!$ !$omp master
!!$ block
!!$ character(len=1024) :: fname
!!$ write(fname,'(a,i4.4,a,i4.4,a)') 'a-',iam,'-',np,'.mtx'
!!$ write(0,*) iam,' Size of A ',a%get_nrows(),a%get_ncols(),a%get_nzeros()
!!$ call a%print(fname,head='Test')
!!$ end block
!!$ !$omp end master
!!$ !$omp end parallel
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return

@ -156,6 +156,9 @@ contains
& f,amold,vmold,imold,partition,nrl,iv) & f,amold,vmold,imold,partition,nrl,iv)
use psb_base_mod use psb_base_mod
use psb_util_mod use psb_util_mod
#if defined(OPENMP)
use omp_lib
#endif
! !
! Discretizes the partial differential equation ! Discretizes the partial differential equation
! !
@ -192,7 +195,7 @@ contains
type(psb_d_coo_sparse_mat) :: acoo type(psb_d_coo_sparse_mat) :: acoo
type(psb_d_csr_sparse_mat) :: acsr type(psb_d_csr_sparse_mat) :: acsr
real(psb_dpk_) :: zt(nb),x,y,z real(psb_dpk_) :: zt(nb),x,y,z
integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_ integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_, mysz
integer(psb_lpk_) :: m,n,glob_row,nt integer(psb_lpk_) :: m,n,glob_row,nt
integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
! For 2D partition ! For 2D partition
@ -204,8 +207,7 @@ contains
! Process grid ! Process grid
integer(psb_ipk_) :: np, iam integer(psb_ipk_) :: np, iam
integer(psb_ipk_) :: icoeff integer(psb_ipk_) :: icoeff
integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:) integer(psb_lpk_), allocatable :: myidx(:)
real(psb_dpk_), allocatable :: val(:)
! deltah dimension of each grid cell ! deltah dimension of each grid cell
! deltat discretization time ! deltat discretization time
real(psb_dpk_) :: deltah, sqdeltah, deltah2 real(psb_dpk_) :: deltah, sqdeltah, deltah2
@ -391,7 +393,6 @@ contains
end if end if
end block end block
case default case default
write(psb_err_unit,*) iam, 'Initialization error: should not get here' write(psb_err_unit,*) iam, 'Initialization error: should not get here'
info = -1 info = -1
@ -418,94 +419,114 @@ contains
goto 9999 goto 9999
end if end if
! we build an auxiliary matrix consisting of one row at a
! time; just a small matrix. might be extended to generate
! a bunch of rows per call.
!
allocate(val(20*nb),irow(20*nb),&
&icol(20*nb),stat=info)
if (info /= psb_success_ ) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
endif
! loop over rows belonging to current process in a block
! distribution.
call psb_barrier(ctxt) call psb_barrier(ctxt)
t1 = psb_wtime() t1 = psb_wtime()
do ii=1, nlr,nb !$omp parallel shared(deltah,myidx,a,desc_a)
ib = min(nb,nlr-ii+1) !
icoeff = 1 block
do k=1,ib integer(psb_ipk_) :: i,j,k,ii,ib,icoeff, ix,iy, ith,nth
i=ii+k-1 integer(psb_lpk_) :: glob_row
! local matrix pointer integer(psb_lpk_), allocatable :: irow(:),icol(:)
glob_row=myidx(i) real(psb_dpk_), allocatable :: val(:)
! compute gridpoint coordinates real(psb_dpk_) :: x,y, zt(nb)
call idx2ijk(ix,iy,glob_row,idim,idim) #if defined(OPENMP)
! x, y coordinates nth = omp_get_num_threads()
x = (ix-1)*deltah ith = omp_get_thread_num()
y = (iy-1)*deltah #else
nth = 1
zt(k) = f_(x,y) ith = 0
! internal point: build discretization #endif
! allocate(val(20*nb),irow(20*nb),&
! term depending on (x-1,y) &icol(20*nb),stat=info)
! if (info /= psb_success_ ) then
val(icoeff) = -a1(x,y)/sqdeltah-b1(x,y)/deltah2 info=psb_err_alloc_dealloc_
if (ix == 1) then call psb_errpush(info,name)
zt(k) = g(dzero,y)*(-val(icoeff)) + zt(k) !goto 9999
else endif
call ijk2idx(icol(icoeff),ix-1,iy,idim,idim)
irow(icoeff) = glob_row !$omp do schedule(dynamic)
icoeff = icoeff+1 !
endif do ii=1, nlr,nb
! term depending on (x,y-1) if(info /= psb_success_) cycle
val(icoeff) = -a2(x,y)/sqdeltah-b2(x,y)/deltah2 ib = min(nb,nlr-ii+1)
if (iy == 1) then icoeff = 1
zt(k) = g(x,dzero)*(-val(icoeff)) + zt(k) do k=1,ib
else i=ii+k-1
call ijk2idx(icol(icoeff),ix,iy-1,idim,idim) ! local matrix pointer
irow(icoeff) = glob_row glob_row=myidx(i)
icoeff = icoeff+1 ! compute gridpoint coordinates
endif call idx2ijk(ix,iy,glob_row,idim,idim)
! x, y coordinates
! term depending on (x,y) x = (ix-1)*deltah
val(icoeff)=(2*done)*(a1(x,y) + a2(x,y))/sqdeltah + c(x,y) y = (iy-1)*deltah
call ijk2idx(icol(icoeff),ix,iy,idim,idim)
irow(icoeff) = glob_row zt(k) = f_(x,y)
icoeff = icoeff+1 ! internal point: build discretization
! term depending on (x,y+1) !
val(icoeff)=-a2(x,y)/sqdeltah+b2(x,y)/deltah2 ! term depending on (x-1,y)
if (iy == idim) then !
zt(k) = g(x,done)*(-val(icoeff)) + zt(k) val(icoeff) = -a1(x,y)/sqdeltah-b1(x,y)/deltah2
else if (ix == 1) then
call ijk2idx(icol(icoeff),ix,iy+1,idim,idim) zt(k) = g(dzero,y)*(-val(icoeff)) + zt(k)
irow(icoeff) = glob_row else
icoeff = icoeff+1 call ijk2idx(icol(icoeff),ix-1,iy,idim,idim)
endif irow(icoeff) = glob_row
! term depending on (x+1,y) icoeff = icoeff+1
val(icoeff)=-a1(x,y)/sqdeltah+b1(x,y)/deltah2 endif
if (ix==idim) then ! term depending on (x,y-1)
zt(k) = g(done,y)*(-val(icoeff)) + zt(k) val(icoeff) = -a2(x,y)/sqdeltah-b2(x,y)/deltah2
else if (iy == 1) then
call ijk2idx(icol(icoeff),ix+1,iy,idim,idim) zt(k) = g(x,dzero)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy-1,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y)
val(icoeff)=(2*done)*(a1(x,y) + a2(x,y))/sqdeltah + c(x,y)
call ijk2idx(icol(icoeff),ix,iy,idim,idim)
irow(icoeff) = glob_row irow(icoeff) = glob_row
icoeff = icoeff+1 icoeff = icoeff+1
endif ! term depending on (x,y+1)
val(icoeff)=-a2(x,y)/sqdeltah+b2(x,y)/deltah2
if (iy == idim) then
zt(k) = g(x,done)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy+1,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x+1,y)
val(icoeff)=-a1(x,y)/sqdeltah+b1(x,y)/deltah2
if (ix==idim) then
zt(k) = g(done,y)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix+1,iy,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
end do
#if defined(OPENMP)
!!$ write(0,*) omp_get_thread_num(),' Check insertion ',&
!!$ & irow(1:icoeff-1),':',icol(1:icoeff-1)
#endif
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
if(info /= psb_success_) cycle
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
if(info /= psb_success_) cycle
zt(:)=dzero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) cycle
end do end do
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) !$omp end do
if(info /= psb_success_) exit deallocate(val,irow,icol)
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) end block
if(info /= psb_success_) exit !$omp end parallel
zt(:)=dzero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) exit
end do
tgen = psb_wtime()-t1 tgen = psb_wtime()-t1
if(info /= psb_success_) then if(info /= psb_success_) then
info=psb_err_from_subroutine_ info=psb_err_from_subroutine_
@ -514,8 +535,6 @@ contains
goto 9999 goto 9999
end if end if
deallocate(val,irow,icol)
call psb_barrier(ctxt) call psb_barrier(ctxt)
t1 = psb_wtime() t1 = psb_wtime()
call psb_cdasb(desc_a,info,mold=imold) call psb_cdasb(desc_a,info,mold=imold)
@ -579,6 +598,9 @@ program psb_d_pde2d
use psb_krylov_mod use psb_krylov_mod
use psb_util_mod use psb_util_mod
use psb_d_pde2d_mod use psb_d_pde2d_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none implicit none
! input parameters ! input parameters
@ -600,7 +622,7 @@ program psb_d_pde2d
type(psb_d_vect_type) :: xxv,bv type(psb_d_vect_type) :: xxv,bv
! parallel environment ! parallel environment
type(psb_ctxt_type) :: ctxt type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: iam, np integer(psb_ipk_) :: iam, np, nth
! solver parameters ! solver parameters
integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, ipart integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, ipart
@ -625,7 +647,16 @@ program psb_d_pde2d
call psb_init(ctxt) call psb_init(ctxt)
call psb_info(ctxt,iam,np) call psb_info(ctxt,iam,np)
#if defined(OPENMP)
!$OMP parallel shared(nth)
!$OMP master
nth = omp_get_num_threads()
!$OMP end master
!$OMP end parallel
#else
nth = 1
#endif
if (iam < 0) then if (iam < 0) then
! This should not happen, but just in case ! This should not happen, but just in case
call psb_exit(ctxt) call psb_exit(ctxt)
@ -750,6 +781,8 @@ program psb_d_pde2d
if (iam == psb_root_) then if (iam == psb_root_) then
write(psb_out_unit,'(" ")') write(psb_out_unit,'(" ")')
write(psb_out_unit,'("Number of processes : ",i12)')np write(psb_out_unit,'("Number of processes : ",i12)')np
write(psb_out_unit,'("Number of threads : ",i12)')nth
write(psb_out_unit,'("Total number of tasks : ",i12)')nth*np
write(psb_out_unit,'("Linear system size : ",i12)') system_size write(psb_out_unit,'("Linear system size : ",i12)') system_size
write(psb_out_unit,'("Time to solve system : ",es12.5)')t2 write(psb_out_unit,'("Time to solve system : ",es12.5)')t2
write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter
@ -790,7 +823,8 @@ contains
! !
! get iteration parameters from standard input ! get iteration parameters from standard input
! !
subroutine get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart,parms) subroutine get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,&
& itmax,itrace,irst,ipart,parms)
type(psb_ctxt_type) :: ctxt type(psb_ctxt_type) :: ctxt
character(len=*) :: kmethd, ptype, afmt character(len=*) :: kmethd, ptype, afmt
integer(psb_ipk_) :: idim, istopc,itmax,itrace,irst,ipart integer(psb_ipk_) :: idim, istopc,itmax,itrace,irst,ipart

@ -172,6 +172,9 @@ contains
& f,amold,vmold,imold,partition,nrl,iv) & f,amold,vmold,imold,partition,nrl,iv)
use psb_base_mod use psb_base_mod
use psb_util_mod use psb_util_mod
#if defined(OPENMP)
use omp_lib
#endif
! !
! Discretizes the partial differential equation ! Discretizes the partial differential equation
! !
@ -220,8 +223,7 @@ contains
! Process grid ! Process grid
integer(psb_ipk_) :: np, iam integer(psb_ipk_) :: np, iam
integer(psb_ipk_) :: icoeff integer(psb_ipk_) :: icoeff
integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:) integer(psb_lpk_), allocatable :: myidx(:)
real(psb_dpk_), allocatable :: val(:)
! deltah dimension of each grid cell ! deltah dimension of each grid cell
! deltat discretization time ! deltat discretization time
real(psb_dpk_) :: deltah, sqdeltah, deltah2 real(psb_dpk_) :: deltah, sqdeltah, deltah2
@ -377,7 +379,6 @@ contains
! !
call psb_cdall(ctxt,desc_a,info,vl=myidx) call psb_cdall(ctxt,desc_a,info,vl=myidx)
! !
! Specify process topology ! Specify process topology
! !
@ -447,69 +448,79 @@ contains
goto 9999 goto 9999
end if end if
! we build an auxiliary matrix consisting of one row at a
! time; just a small matrix. might be extended to generate
! a bunch of rows per call.
!
allocate(val(20*nb),irow(20*nb),&
&icol(20*nb),stat=info)
if (info /= psb_success_ ) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
endif
! loop over rows belonging to current process in a block
! distribution.
call psb_barrier(ctxt) call psb_barrier(ctxt)
t1 = psb_wtime() t1 = psb_wtime()
do ii=1, nlr, nb !$omp parallel shared(deltah,myidx,a,desc_a)
ib = min(nb,nlr-ii+1) !
!ib = min(nb,mysz-ii+1) block
icoeff = 1 integer(psb_ipk_) :: i,j,k,ii,ib,icoeff, ix,iy,iz, ith,nth
do k=1,ib integer(psb_lpk_) :: glob_row
i=ii+k-1 integer(psb_lpk_), allocatable :: irow(:),icol(:)
! local matrix pointer real(psb_dpk_), allocatable :: val(:)
glob_row=myidx(i) real(psb_dpk_) :: x,y,z, zt(nb)
! compute gridpoint coordinates #if defined(OPENMP)
call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) nth = omp_get_num_threads()
! x, y, z coordinates ith = omp_get_thread_num()
x = (ix-1)*deltah #else
y = (iy-1)*deltah nth = 1
z = (iz-1)*deltah ith = 0
zt(k) = f_(x,y,z) #endif
! internal point: build discretization allocate(val(20*nb),irow(20*nb),&
! &icol(20*nb),stat=info)
! term depending on (x-1,y,z) if (info /= psb_success_ ) then
! info=psb_err_alloc_dealloc_
val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2 call psb_errpush(info,name)
if (ix == 1) then !goto 9999
zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k) endif
else
call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim) !$omp do schedule(dynamic)
irow(icoeff) = glob_row !
icoeff = icoeff+1 do ii=1, nlr, nb
endif if(info /= psb_success_) cycle
! term depending on (x,y-1,z) ib = min(nb,nlr-ii+1)
val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2 !ib = min(nb,mysz-ii+1)
if (iy == 1) then icoeff = 1
zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k) do k=1,ib
else i=ii+k-1
call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) ! local matrix pointer
irow(icoeff) = glob_row glob_row=myidx(i)
icoeff = icoeff+1 ! compute gridpoint coordinates
endif call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
! term depending on (x,y,z-1) ! x, y, z coordinates
val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2 x = (ix-1)*deltah
if (iz == 1) then y = (iy-1)*deltah
zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k) z = (iz-1)*deltah
else zt(k) = f_(x,y,z)
call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) ! internal point: build discretization
irow(icoeff) = glob_row !
icoeff = icoeff+1 ! term depending on (x-1,y,z)
endif !
val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2
if (ix == 1) then
zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y-1,z)
val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2
if (iy == 1) then
zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y,z-1)
val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2
if (iz == 1) then
zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y,z) ! term depending on (x,y,z)
val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah & val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah &
@ -546,14 +557,22 @@ contains
endif endif
end do end do
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) #if defined(OPENMP)
if(info /= psb_success_) exit !!$ write(0,*) omp_get_thread_num(),' Check insertion ',&
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) !!$ & irow(1:icoeff-1),':',icol(1:icoeff-1)
if(info /= psb_success_) exit #endif
zt(:)=dzero call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info) if(info /= psb_success_) cycle
if(info /= psb_success_) exit call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
end do if(info /= psb_success_) cycle
zt(:)=dzero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) cycle
end do
!$omp end do
deallocate(val,irow,icol)
end block
!$omp end parallel
tgen = psb_wtime()-t1 tgen = psb_wtime()-t1
if(info /= psb_success_) then if(info /= psb_success_) then
@ -569,113 +588,6 @@ contains
call psb_cdasb(desc_a,info,mold=imold) call psb_cdasb(desc_a,info,mold=imold)
tcdasb = psb_wtime()-t1 tcdasb = psb_wtime()-t1
if (.false.) then
!
! Add extra rows to test remote build.
!
block
integer(psb_ipk_) :: ks, i
ks = desc_a%get_local_cols()-desc_a%get_local_rows()
if (ks > 0) ks = max(1,ks / 10)
mysz = nlr+ks
call psb_realloc(mysz,myidx,info)
do i=nlr+1, mysz
myidx(i) = i
end do
call desc_a%l2gv1(myidx(nlr+1:mysz),info)
!write(0,*) iam,' Check on extra nodes ',nlr,mysz,':',myidx(nlr+1:mysz)
do ii= nlr+1, mysz, nb
ib = min(nb,mysz-ii+1)
icoeff = 1
do k=1,ib
i=ii+k-1
! local matrix pointer
glob_row=myidx(i)
! compute gridpoint coordinates
call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
! x, y, z coordinates
x = (ix-1)*deltah
y = (iy-1)*deltah
z = (iz-1)*deltah
zt(k) = f_(x,y,z)
! internal point: build discretization
!
! term depending on (x-1,y,z)
!
val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2
if (ix == 1) then
zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y-1,z)
val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2
if (iy == 1) then
zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y,z-1)
val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2
if (iz == 1) then
zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y,z)
val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah &
& + c(x,y,z)
call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
! term depending on (x,y,z+1)
val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2
if (iz == idim) then
zt(k) = g(x,y,done)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y+1,z)
val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2
if (iy == idim) then
zt(k) = g(x,done,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x+1,y,z)
val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2
if (ix==idim) then
zt(k) = g(done,y,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
end do
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
if(info /= psb_success_) exit
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
if(info /= psb_success_) exit
zt(:)=dzero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) exit
end do
end block
end if
call psb_barrier(ctxt) call psb_barrier(ctxt)
t1 = psb_wtime() t1 = psb_wtime()
if (info == psb_success_) then if (info == psb_success_) then
@ -719,7 +631,7 @@ contains
write(psb_out_unit,'("-total time : ",es12.5)') ttot write(psb_out_unit,'("-total time : ",es12.5)') ttot
end if end if
deallocate(val,irow,icol)
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -744,6 +656,9 @@ program psb_d_pde3d
use psb_krylov_mod use psb_krylov_mod
use psb_util_mod use psb_util_mod
use psb_d_pde3d_mod use psb_d_pde3d_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none implicit none
! input parameters ! input parameters
@ -765,7 +680,7 @@ program psb_d_pde3d
type(psb_d_vect_type) :: xxv,bv type(psb_d_vect_type) :: xxv,bv
! parallel environment ! parallel environment
type(psb_ctxt_type) :: ctxt type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: iam, np integer(psb_ipk_) :: iam, np, nth
! solver parameters ! solver parameters
integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, ipart integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, ipart
@ -790,7 +705,16 @@ program psb_d_pde3d
call psb_init(ctxt) call psb_init(ctxt)
call psb_info(ctxt,iam,np) call psb_info(ctxt,iam,np)
#if defined(OPENMP)
!$OMP parallel shared(nth)
!$OMP master
nth = omp_get_num_threads()
!$OMP end master
!$OMP end parallel
#else
nth = 1
#endif
if (iam < 0) then if (iam < 0) then
! This should not happen, but just in case ! This should not happen, but just in case
call psb_exit(ctxt) call psb_exit(ctxt)
@ -914,6 +838,8 @@ program psb_d_pde3d
if (iam == psb_root_) then if (iam == psb_root_) then
write(psb_out_unit,'(" ")') write(psb_out_unit,'(" ")')
write(psb_out_unit,'("Number of processes : ",i12)')np write(psb_out_unit,'("Number of processes : ",i12)')np
write(psb_out_unit,'("Number of threads : ",i12)')nth
write(psb_out_unit,'("Total number of tasks : ",i12)')nth*np
write(psb_out_unit,'("Linear system size : ",i12)') system_size write(psb_out_unit,'("Linear system size : ",i12)') system_size
write(psb_out_unit,'("Time to solve system : ",es12.5)')t2 write(psb_out_unit,'("Time to solve system : ",es12.5)')t2
write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter

@ -156,6 +156,9 @@ contains
& f,amold,vmold,imold,partition,nrl,iv) & f,amold,vmold,imold,partition,nrl,iv)
use psb_base_mod use psb_base_mod
use psb_util_mod use psb_util_mod
#if defined(OPENMP)
use omp_lib
#endif
! !
! Discretizes the partial differential equation ! Discretizes the partial differential equation
! !
@ -192,7 +195,7 @@ contains
type(psb_s_coo_sparse_mat) :: acoo type(psb_s_coo_sparse_mat) :: acoo
type(psb_s_csr_sparse_mat) :: acsr type(psb_s_csr_sparse_mat) :: acsr
real(psb_spk_) :: zt(nb),x,y,z real(psb_spk_) :: zt(nb),x,y,z
integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_ integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_, mysz
integer(psb_lpk_) :: m,n,glob_row,nt integer(psb_lpk_) :: m,n,glob_row,nt
integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
! For 2D partition ! For 2D partition
@ -204,8 +207,7 @@ contains
! Process grid ! Process grid
integer(psb_ipk_) :: np, iam integer(psb_ipk_) :: np, iam
integer(psb_ipk_) :: icoeff integer(psb_ipk_) :: icoeff
integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:) integer(psb_lpk_), allocatable :: myidx(:)
real(psb_spk_), allocatable :: val(:)
! deltah dimension of each grid cell ! deltah dimension of each grid cell
! deltat discretization time ! deltat discretization time
real(psb_spk_) :: deltah, sqdeltah, deltah2 real(psb_spk_) :: deltah, sqdeltah, deltah2
@ -391,7 +393,6 @@ contains
end if end if
end block end block
case default case default
write(psb_err_unit,*) iam, 'Initialization error: should not get here' write(psb_err_unit,*) iam, 'Initialization error: should not get here'
info = -1 info = -1
@ -418,94 +419,114 @@ contains
goto 9999 goto 9999
end if end if
! we build an auxiliary matrix consisting of one row at a
! time; just a small matrix. might be extended to generate
! a bunch of rows per call.
!
allocate(val(20*nb),irow(20*nb),&
&icol(20*nb),stat=info)
if (info /= psb_success_ ) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
endif
! loop over rows belonging to current process in a block
! distribution.
call psb_barrier(ctxt) call psb_barrier(ctxt)
t1 = psb_wtime() t1 = psb_wtime()
do ii=1, nlr,nb !$omp parallel shared(deltah,myidx,a,desc_a)
ib = min(nb,nlr-ii+1) !
icoeff = 1 block
do k=1,ib integer(psb_ipk_) :: i,j,k,ii,ib,icoeff, ix,iy, ith,nth
i=ii+k-1 integer(psb_lpk_) :: glob_row
! local matrix pointer integer(psb_lpk_), allocatable :: irow(:),icol(:)
glob_row=myidx(i) real(psb_spk_), allocatable :: val(:)
! compute gridpoint coordinates real(psb_spk_) :: x,y, zt(nb)
call idx2ijk(ix,iy,glob_row,idim,idim) #if defined(OPENMP)
! x, y coordinates nth = omp_get_num_threads()
x = (ix-1)*deltah ith = omp_get_thread_num()
y = (iy-1)*deltah #else
nth = 1
zt(k) = f_(x,y) ith = 0
! internal point: build discretization #endif
! allocate(val(20*nb),irow(20*nb),&
! term depending on (x-1,y) &icol(20*nb),stat=info)
! if (info /= psb_success_ ) then
val(icoeff) = -a1(x,y)/sqdeltah-b1(x,y)/deltah2 info=psb_err_alloc_dealloc_
if (ix == 1) then call psb_errpush(info,name)
zt(k) = g(szero,y)*(-val(icoeff)) + zt(k) !goto 9999
else endif
call ijk2idx(icol(icoeff),ix-1,iy,idim,idim)
irow(icoeff) = glob_row !$omp do schedule(dynamic)
icoeff = icoeff+1 !
endif do ii=1, nlr,nb
! term depending on (x,y-1) if(info /= psb_success_) cycle
val(icoeff) = -a2(x,y)/sqdeltah-b2(x,y)/deltah2 ib = min(nb,nlr-ii+1)
if (iy == 1) then icoeff = 1
zt(k) = g(x,szero)*(-val(icoeff)) + zt(k) do k=1,ib
else i=ii+k-1
call ijk2idx(icol(icoeff),ix,iy-1,idim,idim) ! local matrix pointer
irow(icoeff) = glob_row glob_row=myidx(i)
icoeff = icoeff+1 ! compute gridpoint coordinates
endif call idx2ijk(ix,iy,glob_row,idim,idim)
! x, y coordinates
! term depending on (x,y) x = (ix-1)*deltah
val(icoeff)=(2*sone)*(a1(x,y) + a2(x,y))/sqdeltah + c(x,y) y = (iy-1)*deltah
call ijk2idx(icol(icoeff),ix,iy,idim,idim)
irow(icoeff) = glob_row zt(k) = f_(x,y)
icoeff = icoeff+1 ! internal point: build discretization
! term depending on (x,y+1) !
val(icoeff)=-a2(x,y)/sqdeltah+b2(x,y)/deltah2 ! term depending on (x-1,y)
if (iy == idim) then !
zt(k) = g(x,sone)*(-val(icoeff)) + zt(k) val(icoeff) = -a1(x,y)/sqdeltah-b1(x,y)/deltah2
else if (ix == 1) then
call ijk2idx(icol(icoeff),ix,iy+1,idim,idim) zt(k) = g(szero,y)*(-val(icoeff)) + zt(k)
irow(icoeff) = glob_row else
icoeff = icoeff+1 call ijk2idx(icol(icoeff),ix-1,iy,idim,idim)
endif irow(icoeff) = glob_row
! term depending on (x+1,y) icoeff = icoeff+1
val(icoeff)=-a1(x,y)/sqdeltah+b1(x,y)/deltah2 endif
if (ix==idim) then ! term depending on (x,y-1)
zt(k) = g(sone,y)*(-val(icoeff)) + zt(k) val(icoeff) = -a2(x,y)/sqdeltah-b2(x,y)/deltah2
else if (iy == 1) then
call ijk2idx(icol(icoeff),ix+1,iy,idim,idim) zt(k) = g(x,szero)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy-1,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y)
val(icoeff)=(2*sone)*(a1(x,y) + a2(x,y))/sqdeltah + c(x,y)
call ijk2idx(icol(icoeff),ix,iy,idim,idim)
irow(icoeff) = glob_row irow(icoeff) = glob_row
icoeff = icoeff+1 icoeff = icoeff+1
endif ! term depending on (x,y+1)
val(icoeff)=-a2(x,y)/sqdeltah+b2(x,y)/deltah2
if (iy == idim) then
zt(k) = g(x,sone)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy+1,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x+1,y)
val(icoeff)=-a1(x,y)/sqdeltah+b1(x,y)/deltah2
if (ix==idim) then
zt(k) = g(sone,y)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix+1,iy,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
end do
#if defined(OPENMP)
!!$ write(0,*) omp_get_thread_num(),' Check insertion ',&
!!$ & irow(1:icoeff-1),':',icol(1:icoeff-1)
#endif
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
if(info /= psb_success_) cycle
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
if(info /= psb_success_) cycle
zt(:)=szero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) cycle
end do end do
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) !$omp end do
if(info /= psb_success_) exit deallocate(val,irow,icol)
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) end block
if(info /= psb_success_) exit !$omp end parallel
zt(:)=szero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) exit
end do
tgen = psb_wtime()-t1 tgen = psb_wtime()-t1
if(info /= psb_success_) then if(info /= psb_success_) then
info=psb_err_from_subroutine_ info=psb_err_from_subroutine_
@ -514,8 +535,6 @@ contains
goto 9999 goto 9999
end if end if
deallocate(val,irow,icol)
call psb_barrier(ctxt) call psb_barrier(ctxt)
t1 = psb_wtime() t1 = psb_wtime()
call psb_cdasb(desc_a,info,mold=imold) call psb_cdasb(desc_a,info,mold=imold)
@ -579,6 +598,9 @@ program psb_s_pde2d
use psb_krylov_mod use psb_krylov_mod
use psb_util_mod use psb_util_mod
use psb_s_pde2d_mod use psb_s_pde2d_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none implicit none
! input parameters ! input parameters
@ -600,7 +622,7 @@ program psb_s_pde2d
type(psb_s_vect_type) :: xxv,bv type(psb_s_vect_type) :: xxv,bv
! parallel environment ! parallel environment
type(psb_ctxt_type) :: ctxt type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: iam, np integer(psb_ipk_) :: iam, np, nth
! solver parameters ! solver parameters
integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, ipart integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, ipart
@ -625,7 +647,16 @@ program psb_s_pde2d
call psb_init(ctxt) call psb_init(ctxt)
call psb_info(ctxt,iam,np) call psb_info(ctxt,iam,np)
#if defined(OPENMP)
!$OMP parallel shared(nth)
!$OMP master
nth = omp_get_num_threads()
!$OMP end master
!$OMP end parallel
#else
nth = 1
#endif
if (iam < 0) then if (iam < 0) then
! This should not happen, but just in case ! This should not happen, but just in case
call psb_exit(ctxt) call psb_exit(ctxt)
@ -750,6 +781,8 @@ program psb_s_pde2d
if (iam == psb_root_) then if (iam == psb_root_) then
write(psb_out_unit,'(" ")') write(psb_out_unit,'(" ")')
write(psb_out_unit,'("Number of processes : ",i12)')np write(psb_out_unit,'("Number of processes : ",i12)')np
write(psb_out_unit,'("Number of threads : ",i12)')nth
write(psb_out_unit,'("Total number of tasks : ",i12)')nth*np
write(psb_out_unit,'("Linear system size : ",i12)') system_size write(psb_out_unit,'("Linear system size : ",i12)') system_size
write(psb_out_unit,'("Time to solve system : ",es12.5)')t2 write(psb_out_unit,'("Time to solve system : ",es12.5)')t2
write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter
@ -790,7 +823,8 @@ contains
! !
! get iteration parameters from standard input ! get iteration parameters from standard input
! !
subroutine get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart,parms) subroutine get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,&
& itmax,itrace,irst,ipart,parms)
type(psb_ctxt_type) :: ctxt type(psb_ctxt_type) :: ctxt
character(len=*) :: kmethd, ptype, afmt character(len=*) :: kmethd, ptype, afmt
integer(psb_ipk_) :: idim, istopc,itmax,itrace,irst,ipart integer(psb_ipk_) :: idim, istopc,itmax,itrace,irst,ipart

@ -172,6 +172,9 @@ contains
& f,amold,vmold,imold,partition,nrl,iv) & f,amold,vmold,imold,partition,nrl,iv)
use psb_base_mod use psb_base_mod
use psb_util_mod use psb_util_mod
#if defined(OPENMP)
use omp_lib
#endif
! !
! Discretizes the partial differential equation ! Discretizes the partial differential equation
! !
@ -220,8 +223,7 @@ contains
! Process grid ! Process grid
integer(psb_ipk_) :: np, iam integer(psb_ipk_) :: np, iam
integer(psb_ipk_) :: icoeff integer(psb_ipk_) :: icoeff
integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:) integer(psb_lpk_), allocatable :: myidx(:)
real(psb_spk_), allocatable :: val(:)
! deltah dimension of each grid cell ! deltah dimension of each grid cell
! deltat discretization time ! deltat discretization time
real(psb_spk_) :: deltah, sqdeltah, deltah2 real(psb_spk_) :: deltah, sqdeltah, deltah2
@ -377,7 +379,6 @@ contains
! !
call psb_cdall(ctxt,desc_a,info,vl=myidx) call psb_cdall(ctxt,desc_a,info,vl=myidx)
! !
! Specify process topology ! Specify process topology
! !
@ -447,69 +448,79 @@ contains
goto 9999 goto 9999
end if end if
! we build an auxiliary matrix consisting of one row at a
! time; just a small matrix. might be extended to generate
! a bunch of rows per call.
!
allocate(val(20*nb),irow(20*nb),&
&icol(20*nb),stat=info)
if (info /= psb_success_ ) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
endif
! loop over rows belonging to current process in a block
! distribution.
call psb_barrier(ctxt) call psb_barrier(ctxt)
t1 = psb_wtime() t1 = psb_wtime()
do ii=1, nlr, nb !$omp parallel shared(deltah,myidx,a,desc_a)
ib = min(nb,nlr-ii+1) !
!ib = min(nb,mysz-ii+1) block
icoeff = 1 integer(psb_ipk_) :: i,j,k,ii,ib,icoeff, ix,iy,iz, ith,nth
do k=1,ib integer(psb_lpk_) :: glob_row
i=ii+k-1 integer(psb_lpk_), allocatable :: irow(:),icol(:)
! local matrix pointer real(psb_spk_), allocatable :: val(:)
glob_row=myidx(i) real(psb_spk_) :: x,y,z, zt(nb)
! compute gridpoint coordinates #if defined(OPENMP)
call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) nth = omp_get_num_threads()
! x, y, z coordinates ith = omp_get_thread_num()
x = (ix-1)*deltah #else
y = (iy-1)*deltah nth = 1
z = (iz-1)*deltah ith = 0
zt(k) = f_(x,y,z) #endif
! internal point: build discretization allocate(val(20*nb),irow(20*nb),&
! &icol(20*nb),stat=info)
! term depending on (x-1,y,z) if (info /= psb_success_ ) then
! info=psb_err_alloc_dealloc_
val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2 call psb_errpush(info,name)
if (ix == 1) then !goto 9999
zt(k) = g(szero,y,z)*(-val(icoeff)) + zt(k) endif
else
call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim) !$omp do schedule(dynamic)
irow(icoeff) = glob_row !
icoeff = icoeff+1 do ii=1, nlr, nb
endif if(info /= psb_success_) cycle
! term depending on (x,y-1,z) ib = min(nb,nlr-ii+1)
val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2 !ib = min(nb,mysz-ii+1)
if (iy == 1) then icoeff = 1
zt(k) = g(x,szero,z)*(-val(icoeff)) + zt(k) do k=1,ib
else i=ii+k-1
call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) ! local matrix pointer
irow(icoeff) = glob_row glob_row=myidx(i)
icoeff = icoeff+1 ! compute gridpoint coordinates
endif call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
! term depending on (x,y,z-1) ! x, y, z coordinates
val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2 x = (ix-1)*deltah
if (iz == 1) then y = (iy-1)*deltah
zt(k) = g(x,y,szero)*(-val(icoeff)) + zt(k) z = (iz-1)*deltah
else zt(k) = f_(x,y,z)
call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) ! internal point: build discretization
irow(icoeff) = glob_row !
icoeff = icoeff+1 ! term depending on (x-1,y,z)
endif !
val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2
if (ix == 1) then
zt(k) = g(szero,y,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y-1,z)
val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2
if (iy == 1) then
zt(k) = g(x,szero,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y,z-1)
val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2
if (iz == 1) then
zt(k) = g(x,y,szero)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y,z) ! term depending on (x,y,z)
val(icoeff)=(2*sone)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah & val(icoeff)=(2*sone)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah &
@ -546,14 +557,22 @@ contains
endif endif
end do end do
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) #if defined(OPENMP)
if(info /= psb_success_) exit !!$ write(0,*) omp_get_thread_num(),' Check insertion ',&
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) !!$ & irow(1:icoeff-1),':',icol(1:icoeff-1)
if(info /= psb_success_) exit #endif
zt(:)=szero call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info) if(info /= psb_success_) cycle
if(info /= psb_success_) exit call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
end do if(info /= psb_success_) cycle
zt(:)=szero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) cycle
end do
!$omp end do
deallocate(val,irow,icol)
end block
!$omp end parallel
tgen = psb_wtime()-t1 tgen = psb_wtime()-t1
if(info /= psb_success_) then if(info /= psb_success_) then
@ -569,113 +588,6 @@ contains
call psb_cdasb(desc_a,info,mold=imold) call psb_cdasb(desc_a,info,mold=imold)
tcdasb = psb_wtime()-t1 tcdasb = psb_wtime()-t1
if (.false.) then
!
! Add extra rows to test remote build.
!
block
integer(psb_ipk_) :: ks, i
ks = desc_a%get_local_cols()-desc_a%get_local_rows()
if (ks > 0) ks = max(1,ks / 10)
mysz = nlr+ks
call psb_realloc(mysz,myidx,info)
do i=nlr+1, mysz
myidx(i) = i
end do
call desc_a%l2gv1(myidx(nlr+1:mysz),info)
!write(0,*) iam,' Check on extra nodes ',nlr,mysz,':',myidx(nlr+1:mysz)
do ii= nlr+1, mysz, nb
ib = min(nb,mysz-ii+1)
icoeff = 1
do k=1,ib
i=ii+k-1
! local matrix pointer
glob_row=myidx(i)
! compute gridpoint coordinates
call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
! x, y, z coordinates
x = (ix-1)*deltah
y = (iy-1)*deltah
z = (iz-1)*deltah
zt(k) = f_(x,y,z)
! internal point: build discretization
!
! term depending on (x-1,y,z)
!
val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2
if (ix == 1) then
zt(k) = g(szero,y,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y-1,z)
val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2
if (iy == 1) then
zt(k) = g(x,szero,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y,z-1)
val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2
if (iz == 1) then
zt(k) = g(x,y,szero)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y,z)
val(icoeff)=(2*sone)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah &
& + c(x,y,z)
call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
! term depending on (x,y,z+1)
val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2
if (iz == idim) then
zt(k) = g(x,y,sone)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y+1,z)
val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2
if (iy == idim) then
zt(k) = g(x,sone,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x+1,y,z)
val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2
if (ix==idim) then
zt(k) = g(sone,y,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
end do
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
if(info /= psb_success_) exit
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
if(info /= psb_success_) exit
zt(:)=szero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) exit
end do
end block
end if
call psb_barrier(ctxt) call psb_barrier(ctxt)
t1 = psb_wtime() t1 = psb_wtime()
if (info == psb_success_) then if (info == psb_success_) then
@ -719,7 +631,7 @@ contains
write(psb_out_unit,'("-total time : ",es12.5)') ttot write(psb_out_unit,'("-total time : ",es12.5)') ttot
end if end if
deallocate(val,irow,icol)
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -744,6 +656,9 @@ program psb_s_pde3d
use psb_krylov_mod use psb_krylov_mod
use psb_util_mod use psb_util_mod
use psb_s_pde3d_mod use psb_s_pde3d_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none implicit none
! input parameters ! input parameters
@ -765,7 +680,7 @@ program psb_s_pde3d
type(psb_s_vect_type) :: xxv,bv type(psb_s_vect_type) :: xxv,bv
! parallel environment ! parallel environment
type(psb_ctxt_type) :: ctxt type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: iam, np integer(psb_ipk_) :: iam, np, nth
! solver parameters ! solver parameters
integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, ipart integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, ipart
@ -790,7 +705,16 @@ program psb_s_pde3d
call psb_init(ctxt) call psb_init(ctxt)
call psb_info(ctxt,iam,np) call psb_info(ctxt,iam,np)
#if defined(OPENMP)
!$OMP parallel shared(nth)
!$OMP master
nth = omp_get_num_threads()
!$OMP end master
!$OMP end parallel
#else
nth = 1
#endif
if (iam < 0) then if (iam < 0) then
! This should not happen, but just in case ! This should not happen, but just in case
call psb_exit(ctxt) call psb_exit(ctxt)
@ -914,6 +838,8 @@ program psb_s_pde3d
if (iam == psb_root_) then if (iam == psb_root_) then
write(psb_out_unit,'(" ")') write(psb_out_unit,'(" ")')
write(psb_out_unit,'("Number of processes : ",i12)')np write(psb_out_unit,'("Number of processes : ",i12)')np
write(psb_out_unit,'("Number of threads : ",i12)')nth
write(psb_out_unit,'("Total number of tasks : ",i12)')nth*np
write(psb_out_unit,'("Linear system size : ",i12)') system_size write(psb_out_unit,'("Linear system size : ",i12)') system_size
write(psb_out_unit,'("Time to solve system : ",es12.5)')t2 write(psb_out_unit,'("Time to solve system : ",es12.5)')t2
write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter

Loading…
Cancel
Save