|
|
|
@ -172,6 +172,9 @@ contains
|
|
|
|
|
& f,amold,vmold,imold,partition,nrl,iv)
|
|
|
|
|
use psb_base_mod
|
|
|
|
|
use psb_util_mod
|
|
|
|
|
#if defined(OPENMP)
|
|
|
|
|
use omp_lib
|
|
|
|
|
#endif
|
|
|
|
|
!
|
|
|
|
|
! Discretizes the partial differential equation
|
|
|
|
|
!
|
|
|
|
@ -220,8 +223,7 @@ contains
|
|
|
|
|
! Process grid
|
|
|
|
|
integer(psb_ipk_) :: np, iam
|
|
|
|
|
integer(psb_ipk_) :: icoeff
|
|
|
|
|
integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:)
|
|
|
|
|
real(psb_dpk_), allocatable :: val(:)
|
|
|
|
|
integer(psb_lpk_), allocatable :: myidx(:)
|
|
|
|
|
! deltah dimension of each grid cell
|
|
|
|
|
! deltat discretization time
|
|
|
|
|
real(psb_dpk_) :: deltah, sqdeltah, deltah2
|
|
|
|
@ -377,7 +379,6 @@ contains
|
|
|
|
|
!
|
|
|
|
|
call psb_cdall(ctxt,desc_a,info,vl=myidx)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
!
|
|
|
|
|
! Specify process topology
|
|
|
|
|
!
|
|
|
|
@ -447,69 +448,79 @@ contains
|
|
|
|
|
goto 9999
|
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
! we build an auxiliary matrix consisting of one row at a
|
|
|
|
|
! time; just a small matrix. might be extended to generate
|
|
|
|
|
! a bunch of rows per call.
|
|
|
|
|
!
|
|
|
|
|
allocate(val(20*nb),irow(20*nb),&
|
|
|
|
|
&icol(20*nb),stat=info)
|
|
|
|
|
if (info /= psb_success_ ) then
|
|
|
|
|
info=psb_err_alloc_dealloc_
|
|
|
|
|
call psb_errpush(info,name)
|
|
|
|
|
goto 9999
|
|
|
|
|
endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
! loop over rows belonging to current process in a block
|
|
|
|
|
! distribution.
|
|
|
|
|
|
|
|
|
|
call psb_barrier(ctxt)
|
|
|
|
|
t1 = psb_wtime()
|
|
|
|
|
do ii=1, nlr, nb
|
|
|
|
|
ib = min(nb,nlr-ii+1)
|
|
|
|
|
!ib = min(nb,mysz-ii+1)
|
|
|
|
|
icoeff = 1
|
|
|
|
|
do k=1,ib
|
|
|
|
|
i=ii+k-1
|
|
|
|
|
! local matrix pointer
|
|
|
|
|
glob_row=myidx(i)
|
|
|
|
|
! compute gridpoint coordinates
|
|
|
|
|
call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
|
|
|
|
|
! x, y, z coordinates
|
|
|
|
|
x = (ix-1)*deltah
|
|
|
|
|
y = (iy-1)*deltah
|
|
|
|
|
z = (iz-1)*deltah
|
|
|
|
|
zt(k) = f_(x,y,z)
|
|
|
|
|
! internal point: build discretization
|
|
|
|
|
!
|
|
|
|
|
! term depending on (x-1,y,z)
|
|
|
|
|
!
|
|
|
|
|
val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2
|
|
|
|
|
if (ix == 1) then
|
|
|
|
|
zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k)
|
|
|
|
|
else
|
|
|
|
|
call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
|
|
|
|
|
irow(icoeff) = glob_row
|
|
|
|
|
icoeff = icoeff+1
|
|
|
|
|
endif
|
|
|
|
|
! term depending on (x,y-1,z)
|
|
|
|
|
val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2
|
|
|
|
|
if (iy == 1) then
|
|
|
|
|
zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k)
|
|
|
|
|
else
|
|
|
|
|
call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)
|
|
|
|
|
irow(icoeff) = glob_row
|
|
|
|
|
icoeff = icoeff+1
|
|
|
|
|
endif
|
|
|
|
|
! term depending on (x,y,z-1)
|
|
|
|
|
val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2
|
|
|
|
|
if (iz == 1) then
|
|
|
|
|
zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k)
|
|
|
|
|
else
|
|
|
|
|
call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)
|
|
|
|
|
irow(icoeff) = glob_row
|
|
|
|
|
icoeff = icoeff+1
|
|
|
|
|
endif
|
|
|
|
|
!$omp parallel shared(deltah,myidx,a,desc_a)
|
|
|
|
|
!
|
|
|
|
|
block
|
|
|
|
|
integer(psb_ipk_) :: i,j,k,ii,ib,icoeff, ix,iy,iz, ith,nth
|
|
|
|
|
integer(psb_lpk_) :: glob_row
|
|
|
|
|
integer(psb_lpk_), allocatable :: irow(:),icol(:)
|
|
|
|
|
real(psb_dpk_), allocatable :: val(:)
|
|
|
|
|
real(psb_dpk_) :: x,y,z, zt(nb)
|
|
|
|
|
#if defined(OPENMP)
|
|
|
|
|
nth = omp_get_num_threads()
|
|
|
|
|
ith = omp_get_thread_num()
|
|
|
|
|
#else
|
|
|
|
|
nth = 1
|
|
|
|
|
ith = 0
|
|
|
|
|
#endif
|
|
|
|
|
allocate(val(20*nb),irow(20*nb),&
|
|
|
|
|
&icol(20*nb),stat=info)
|
|
|
|
|
if (info /= psb_success_ ) then
|
|
|
|
|
info=psb_err_alloc_dealloc_
|
|
|
|
|
call psb_errpush(info,name)
|
|
|
|
|
!goto 9999
|
|
|
|
|
endif
|
|
|
|
|
|
|
|
|
|
!$omp do schedule(dynamic)
|
|
|
|
|
!
|
|
|
|
|
do ii=1, nlr, nb
|
|
|
|
|
if(info /= psb_success_) cycle
|
|
|
|
|
ib = min(nb,nlr-ii+1)
|
|
|
|
|
!ib = min(nb,mysz-ii+1)
|
|
|
|
|
icoeff = 1
|
|
|
|
|
do k=1,ib
|
|
|
|
|
i=ii+k-1
|
|
|
|
|
! local matrix pointer
|
|
|
|
|
glob_row=myidx(i)
|
|
|
|
|
! compute gridpoint coordinates
|
|
|
|
|
call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
|
|
|
|
|
! x, y, z coordinates
|
|
|
|
|
x = (ix-1)*deltah
|
|
|
|
|
y = (iy-1)*deltah
|
|
|
|
|
z = (iz-1)*deltah
|
|
|
|
|
zt(k) = f_(x,y,z)
|
|
|
|
|
! internal point: build discretization
|
|
|
|
|
!
|
|
|
|
|
! term depending on (x-1,y,z)
|
|
|
|
|
!
|
|
|
|
|
val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2
|
|
|
|
|
if (ix == 1) then
|
|
|
|
|
zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k)
|
|
|
|
|
else
|
|
|
|
|
call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
|
|
|
|
|
irow(icoeff) = glob_row
|
|
|
|
|
icoeff = icoeff+1
|
|
|
|
|
endif
|
|
|
|
|
! term depending on (x,y-1,z)
|
|
|
|
|
val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2
|
|
|
|
|
if (iy == 1) then
|
|
|
|
|
zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k)
|
|
|
|
|
else
|
|
|
|
|
call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)
|
|
|
|
|
irow(icoeff) = glob_row
|
|
|
|
|
icoeff = icoeff+1
|
|
|
|
|
endif
|
|
|
|
|
! term depending on (x,y,z-1)
|
|
|
|
|
val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2
|
|
|
|
|
if (iz == 1) then
|
|
|
|
|
zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k)
|
|
|
|
|
else
|
|
|
|
|
call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)
|
|
|
|
|
irow(icoeff) = glob_row
|
|
|
|
|
icoeff = icoeff+1
|
|
|
|
|
endif
|
|
|
|
|
|
|
|
|
|
! term depending on (x,y,z)
|
|
|
|
|
val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah &
|
|
|
|
@ -546,14 +557,22 @@ contains
|
|
|
|
|
endif
|
|
|
|
|
|
|
|
|
|
end do
|
|
|
|
|
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
|
|
|
|
|
if(info /= psb_success_) exit
|
|
|
|
|
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
|
|
|
|
|
if(info /= psb_success_) exit
|
|
|
|
|
zt(:)=dzero
|
|
|
|
|
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
|
|
|
|
|
if(info /= psb_success_) exit
|
|
|
|
|
end do
|
|
|
|
|
#if defined(OPENMP)
|
|
|
|
|
!!$ write(0,*) omp_get_thread_num(),' Check insertion ',&
|
|
|
|
|
!!$ & irow(1:icoeff-1),':',icol(1:icoeff-1)
|
|
|
|
|
#endif
|
|
|
|
|
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
|
|
|
|
|
if(info /= psb_success_) cycle
|
|
|
|
|
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
|
|
|
|
|
if(info /= psb_success_) cycle
|
|
|
|
|
zt(:)=dzero
|
|
|
|
|
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
|
|
|
|
|
if(info /= psb_success_) cycle
|
|
|
|
|
end do
|
|
|
|
|
!$omp end do
|
|
|
|
|
deallocate(val,irow,icol)
|
|
|
|
|
end block
|
|
|
|
|
!$omp end parallel
|
|
|
|
|
|
|
|
|
|
tgen = psb_wtime()-t1
|
|
|
|
|
if(info /= psb_success_) then
|
|
|
|
@ -569,113 +588,6 @@ contains
|
|
|
|
|
call psb_cdasb(desc_a,info,mold=imold)
|
|
|
|
|
tcdasb = psb_wtime()-t1
|
|
|
|
|
|
|
|
|
|
if (.false.) then
|
|
|
|
|
!
|
|
|
|
|
! Add extra rows to test remote build.
|
|
|
|
|
!
|
|
|
|
|
block
|
|
|
|
|
integer(psb_ipk_) :: ks, i
|
|
|
|
|
ks = desc_a%get_local_cols()-desc_a%get_local_rows()
|
|
|
|
|
if (ks > 0) ks = max(1,ks / 10)
|
|
|
|
|
mysz = nlr+ks
|
|
|
|
|
call psb_realloc(mysz,myidx,info)
|
|
|
|
|
do i=nlr+1, mysz
|
|
|
|
|
myidx(i) = i
|
|
|
|
|
end do
|
|
|
|
|
call desc_a%l2gv1(myidx(nlr+1:mysz),info)
|
|
|
|
|
!write(0,*) iam,' Check on extra nodes ',nlr,mysz,':',myidx(nlr+1:mysz)
|
|
|
|
|
do ii= nlr+1, mysz, nb
|
|
|
|
|
ib = min(nb,mysz-ii+1)
|
|
|
|
|
icoeff = 1
|
|
|
|
|
do k=1,ib
|
|
|
|
|
i=ii+k-1
|
|
|
|
|
! local matrix pointer
|
|
|
|
|
glob_row=myidx(i)
|
|
|
|
|
! compute gridpoint coordinates
|
|
|
|
|
call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
|
|
|
|
|
! x, y, z coordinates
|
|
|
|
|
x = (ix-1)*deltah
|
|
|
|
|
y = (iy-1)*deltah
|
|
|
|
|
z = (iz-1)*deltah
|
|
|
|
|
zt(k) = f_(x,y,z)
|
|
|
|
|
! internal point: build discretization
|
|
|
|
|
!
|
|
|
|
|
! term depending on (x-1,y,z)
|
|
|
|
|
!
|
|
|
|
|
val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2
|
|
|
|
|
if (ix == 1) then
|
|
|
|
|
zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k)
|
|
|
|
|
else
|
|
|
|
|
call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
|
|
|
|
|
irow(icoeff) = glob_row
|
|
|
|
|
icoeff = icoeff+1
|
|
|
|
|
endif
|
|
|
|
|
! term depending on (x,y-1,z)
|
|
|
|
|
val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2
|
|
|
|
|
if (iy == 1) then
|
|
|
|
|
zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k)
|
|
|
|
|
else
|
|
|
|
|
call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)
|
|
|
|
|
irow(icoeff) = glob_row
|
|
|
|
|
icoeff = icoeff+1
|
|
|
|
|
endif
|
|
|
|
|
! term depending on (x,y,z-1)
|
|
|
|
|
val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2
|
|
|
|
|
if (iz == 1) then
|
|
|
|
|
zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k)
|
|
|
|
|
else
|
|
|
|
|
call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)
|
|
|
|
|
irow(icoeff) = glob_row
|
|
|
|
|
icoeff = icoeff+1
|
|
|
|
|
endif
|
|
|
|
|
|
|
|
|
|
! term depending on (x,y,z)
|
|
|
|
|
val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah &
|
|
|
|
|
& + c(x,y,z)
|
|
|
|
|
call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim)
|
|
|
|
|
irow(icoeff) = glob_row
|
|
|
|
|
icoeff = icoeff+1
|
|
|
|
|
! term depending on (x,y,z+1)
|
|
|
|
|
val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2
|
|
|
|
|
if (iz == idim) then
|
|
|
|
|
zt(k) = g(x,y,done)*(-val(icoeff)) + zt(k)
|
|
|
|
|
else
|
|
|
|
|
call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim)
|
|
|
|
|
irow(icoeff) = glob_row
|
|
|
|
|
icoeff = icoeff+1
|
|
|
|
|
endif
|
|
|
|
|
! term depending on (x,y+1,z)
|
|
|
|
|
val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2
|
|
|
|
|
if (iy == idim) then
|
|
|
|
|
zt(k) = g(x,done,z)*(-val(icoeff)) + zt(k)
|
|
|
|
|
else
|
|
|
|
|
call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim)
|
|
|
|
|
irow(icoeff) = glob_row
|
|
|
|
|
icoeff = icoeff+1
|
|
|
|
|
endif
|
|
|
|
|
! term depending on (x+1,y,z)
|
|
|
|
|
val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2
|
|
|
|
|
if (ix==idim) then
|
|
|
|
|
zt(k) = g(done,y,z)*(-val(icoeff)) + zt(k)
|
|
|
|
|
else
|
|
|
|
|
call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim)
|
|
|
|
|
irow(icoeff) = glob_row
|
|
|
|
|
icoeff = icoeff+1
|
|
|
|
|
endif
|
|
|
|
|
|
|
|
|
|
end do
|
|
|
|
|
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
|
|
|
|
|
if(info /= psb_success_) exit
|
|
|
|
|
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
|
|
|
|
|
if(info /= psb_success_) exit
|
|
|
|
|
zt(:)=dzero
|
|
|
|
|
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
|
|
|
|
|
if(info /= psb_success_) exit
|
|
|
|
|
end do
|
|
|
|
|
|
|
|
|
|
end block
|
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
call psb_barrier(ctxt)
|
|
|
|
|
t1 = psb_wtime()
|
|
|
|
|
if (info == psb_success_) then
|
|
|
|
@ -719,7 +631,7 @@ contains
|
|
|
|
|
write(psb_out_unit,'("-total time : ",es12.5)') ttot
|
|
|
|
|
|
|
|
|
|
end if
|
|
|
|
|
deallocate(val,irow,icol)
|
|
|
|
|
|
|
|
|
|
call psb_erractionrestore(err_act)
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
@ -744,6 +656,9 @@ program psb_d_pde3d
|
|
|
|
|
use psb_krylov_mod
|
|
|
|
|
use psb_util_mod
|
|
|
|
|
use psb_d_pde3d_mod
|
|
|
|
|
#if defined(OPENMP)
|
|
|
|
|
use omp_lib
|
|
|
|
|
#endif
|
|
|
|
|
implicit none
|
|
|
|
|
|
|
|
|
|
! input parameters
|
|
|
|
@ -765,7 +680,7 @@ program psb_d_pde3d
|
|
|
|
|
type(psb_d_vect_type) :: xxv,bv
|
|
|
|
|
! parallel environment
|
|
|
|
|
type(psb_ctxt_type) :: ctxt
|
|
|
|
|
integer(psb_ipk_) :: iam, np
|
|
|
|
|
integer(psb_ipk_) :: iam, np, nth
|
|
|
|
|
|
|
|
|
|
! solver parameters
|
|
|
|
|
integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, ipart
|
|
|
|
@ -790,7 +705,16 @@ program psb_d_pde3d
|
|
|
|
|
|
|
|
|
|
call psb_init(ctxt)
|
|
|
|
|
call psb_info(ctxt,iam,np)
|
|
|
|
|
|
|
|
|
|
#if defined(OPENMP)
|
|
|
|
|
!$OMP parallel shared(nth)
|
|
|
|
|
!$OMP master
|
|
|
|
|
nth = omp_get_num_threads()
|
|
|
|
|
!$OMP end master
|
|
|
|
|
!$OMP end parallel
|
|
|
|
|
#else
|
|
|
|
|
nth = 1
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
if (iam < 0) then
|
|
|
|
|
! This should not happen, but just in case
|
|
|
|
|
call psb_exit(ctxt)
|
|
|
|
@ -914,6 +838,8 @@ program psb_d_pde3d
|
|
|
|
|
if (iam == psb_root_) then
|
|
|
|
|
write(psb_out_unit,'(" ")')
|
|
|
|
|
write(psb_out_unit,'("Number of processes : ",i12)')np
|
|
|
|
|
write(psb_out_unit,'("Number of threads : ",i12)')nth
|
|
|
|
|
write(psb_out_unit,'("Total number of tasks : ",i12)')nth*np
|
|
|
|
|
write(psb_out_unit,'("Linear system size : ",i12)') system_size
|
|
|
|
|
write(psb_out_unit,'("Time to solve system : ",es12.5)')t2
|
|
|
|
|
write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter
|
|
|
|
|