OpenMP loop in samples data generation

omp-walther
sfilippone 1 year ago
parent 73e5d49913
commit 494b8b925f

@ -93,6 +93,9 @@ contains
& a1,a2,a3,b1,b2,b3,c,g,info,f,amold,vmold,partition, nrl,iv) & a1,a2,a3,b1,b2,b3,c,g,info,f,amold,vmold,partition, nrl,iv)
use psb_base_mod use psb_base_mod
use psb_util_mod use psb_util_mod
#if defined(OPENMP)
use omp_lib
#endif
! !
! Discretizes the partial differential equation ! Discretizes the partial differential equation
! !
@ -128,7 +131,6 @@ contains
type(psb_d_csc_sparse_mat) :: acsc type(psb_d_csc_sparse_mat) :: acsc
type(psb_d_coo_sparse_mat) :: acoo type(psb_d_coo_sparse_mat) :: acoo
type(psb_d_csr_sparse_mat) :: acsr type(psb_d_csr_sparse_mat) :: acsr
real(psb_dpk_) :: zt(nb),x,y,z,xph,xmh,yph,ymh,zph,zmh
integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_ integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_
integer(psb_lpk_) :: m,n,glob_row,nt integer(psb_lpk_) :: m,n,glob_row,nt
integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
@ -141,8 +143,7 @@ contains
! Process grid ! Process grid
integer(psb_ipk_) :: np, iam integer(psb_ipk_) :: np, iam
integer(psb_ipk_) :: icoeff integer(psb_ipk_) :: icoeff
integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:) integer(psb_lpk_), allocatable :: myidx(:)
real(psb_dpk_), allocatable :: val(:)
! deltah dimension of each grid cell ! deltah dimension of each grid cell
! deltat discretization time ! deltat discretization time
real(psb_dpk_) :: deltah, sqdeltah, deltah2 real(psb_dpk_) :: deltah, sqdeltah, deltah2
@ -368,119 +369,128 @@ contains
call psb_barrier(ctxt) call psb_barrier(ctxt)
talc = psb_wtime()-t0 talc = psb_wtime()-t0
if (info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='allocation rout.'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
! we build an auxiliary matrix consisting of one row at a
! time; just a small matrix. might be extended to generate
! a bunch of rows per call.
!
allocate(val(20*nb),irow(20*nb),&
&icol(20*nb),stat=info)
if (info /= psb_success_ ) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
endif
! loop over rows belonging to current process in a block
! distribution.
call psb_barrier(ctxt) call psb_barrier(ctxt)
t1 = psb_wtime() t1 = psb_wtime()
do ii=1, nlr,nb !$omp parallel shared(deltah,myidx,a,desc_a)
ib = min(nb,nlr-ii+1) !
icoeff = 1 block
do k=1,ib integer(psb_ipk_) :: i,j,k,ii,ib,icoeff, ix,iy,iz, ith,nth
i=ii+k-1 integer(psb_lpk_) :: glob_row
! local matrix pointer integer(psb_lpk_), allocatable :: irow(:),icol(:)
glob_row=myidx(i) real(psb_dpk_), allocatable :: val(:)
! compute gridpoint coordinates real(psb_dpk_) :: x,y,z, zt(nb)
call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) #if defined(OPENMP)
! x, y, z coordinates nth = omp_get_num_threads()
x = (ix-1)*deltah ith = omp_get_thread_num()
y = (iy-1)*deltah #else
z = (iz-1)*deltah nth = 1
zt(k) = f_(x,y,z) ith = 0
! internal point: build discretization #endif
! allocate(val(20*nb),irow(20*nb),&
! term depending on (x-1,y,z) &icol(20*nb),stat=info)
! if (info /= psb_success_ ) then
val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2 info=psb_err_alloc_dealloc_
if (ix == 1) then call psb_errpush(info,name)
zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k) !goto 9999
else endif
call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row !$omp do schedule(dynamic)
icoeff = icoeff+1 !
endif do ii=1, nlr, nb
! term depending on (x,y-1,z) if (info /= psb_success_) cycle
val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2 ib = min(nb,nlr-ii+1)
if (iy == 1) then icoeff = 1
zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k) do k=1,ib
else i=ii+k-1
call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) ! local matrix pointer
irow(icoeff) = glob_row glob_row=myidx(i)
icoeff = icoeff+1 ! compute gridpoint coordinates
endif call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
! term depending on (x,y,z-1) ! x, y, z coordinates
val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2 x = (ix-1)*deltah
if (iz == 1) then y = (iy-1)*deltah
zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k) z = (iz-1)*deltah
else zt(k) = f_(x,y,z)
call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) ! internal point: build discretization
irow(icoeff) = glob_row !
icoeff = icoeff+1 ! term depending on (x-1,y,z)
endif !
val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2
! term depending on (x,y,z) if (ix == 1) then
val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah & zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k)
& + c(x,y,z) else
call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim) call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row irow(icoeff) = glob_row
icoeff = icoeff+1 icoeff = icoeff+1
! term depending on (x,y,z+1) endif
val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2 ! term depending on (x,y-1,z)
if (iz == idim) then val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2
zt(k) = g(x,y,done)*(-val(icoeff)) + zt(k) if (iy == 1) then
else zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k)
call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim) else
irow(icoeff) = glob_row call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)
icoeff = icoeff+1 irow(icoeff) = glob_row
endif icoeff = icoeff+1
! term depending on (x,y+1,z) endif
val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2 ! term depending on (x,y,z-1)
if (iy == idim) then val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2
zt(k) = g(x,done,z)*(-val(icoeff)) + zt(k) if (iz == 1) then
else zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k)
call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim) else
irow(icoeff) = glob_row call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)
icoeff = icoeff+1 irow(icoeff) = glob_row
endif icoeff = icoeff+1
! term depending on (x+1,y,z) endif
val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2
if (ix==idim) then ! term depending on (x,y,z)
zt(k) = g(done,y,z)*(-val(icoeff)) + zt(k) val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah &
else & + c(x,y,z)
call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim) call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row irow(icoeff) = glob_row
icoeff = icoeff+1 icoeff = icoeff+1
endif ! term depending on (x,y,z+1)
val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2
if (iz == idim) then
zt(k) = g(x,y,done)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y+1,z)
val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2
if (iy == idim) then
zt(k) = g(x,done,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x+1,y,z)
val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2
if (ix==idim) then
zt(k) = g(done,y,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
end do
!write(0,*) ' Outer in_parallel ',omp_in_parallel()
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
if(info /= psb_success_) cycle
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
if(info /= psb_success_) cycle
zt(:)=dzero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) cycle
end do end do
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) !$omp end do
if(info /= psb_success_) exit
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) deallocate(val,irow,icol)
if(info /= psb_success_) exit end block
zt(:)=dzero !$omp end parallel
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) exit
end do
tgen = psb_wtime()-t1 tgen = psb_wtime()-t1
if(info /= psb_success_) then if(info /= psb_success_) then
@ -490,7 +500,6 @@ contains
goto 9999 goto 9999
end if end if
deallocate(val,irow,icol)
call psb_barrier(ctxt) call psb_barrier(ctxt)
t1 = psb_wtime() t1 = psb_wtime()
@ -557,6 +566,9 @@ contains
& a1,a2,b1,b2,c,g,info,f,amold,vmold,partition, nrl,iv) & a1,a2,b1,b2,c,g,info,f,amold,vmold,partition, nrl,iv)
use psb_base_mod use psb_base_mod
use psb_util_mod use psb_util_mod
#if defined(OPENMP)
use omp_lib
#endif
! !
! Discretizes the partial differential equation ! Discretizes the partial differential equation
! !
@ -591,7 +603,6 @@ contains
type(psb_d_csc_sparse_mat) :: acsc type(psb_d_csc_sparse_mat) :: acsc
type(psb_d_coo_sparse_mat) :: acoo type(psb_d_coo_sparse_mat) :: acoo
type(psb_d_csr_sparse_mat) :: acsr type(psb_d_csr_sparse_mat) :: acsr
real(psb_dpk_) :: zt(nb),x,y,z,xph,xmh,yph,ymh,zph,zmh
integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_ integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_
integer(psb_lpk_) :: m,n,glob_row,nt integer(psb_lpk_) :: m,n,glob_row,nt
integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
@ -604,8 +615,7 @@ contains
! Process grid ! Process grid
integer(psb_ipk_) :: np, iam integer(psb_ipk_) :: np, iam
integer(psb_ipk_) :: icoeff integer(psb_ipk_) :: icoeff
integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:) integer(psb_lpk_), allocatable :: myidx(:)
real(psb_dpk_), allocatable :: val(:)
! deltah dimension of each grid cell ! deltah dimension of each grid cell
! deltat discretization time ! deltat discretization time
real(psb_dpk_) :: deltah, sqdeltah, deltah2, dd real(psb_dpk_) :: deltah, sqdeltah, deltah2, dd
@ -816,93 +826,109 @@ contains
goto 9999 goto 9999
end if end if
! we build an auxiliary matrix consisting of one row at a
! time; just a small matrix. might be extended to generate
! a bunch of rows per call.
!
allocate(val(20*nb),irow(20*nb),&
&icol(20*nb),stat=info)
if (info /= psb_success_ ) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
endif
! loop over rows belonging to current process in a block
! distribution.
call psb_barrier(ctxt) call psb_barrier(ctxt)
t1 = psb_wtime() t1 = psb_wtime()
do ii=1, nlr,nb !$omp parallel shared(deltah,myidx,a,desc_a)
ib = min(nb,nlr-ii+1) !
icoeff = 1 block
do k=1,ib integer(psb_ipk_) :: i,j,k,ii,ib,icoeff, ix,iy,iz, ith,nth
i=ii+k-1 integer(psb_lpk_) :: glob_row
! local matrix pointer integer(psb_lpk_), allocatable :: irow(:),icol(:)
glob_row=myidx(i) real(psb_dpk_), allocatable :: val(:)
! compute gridpoint coordinates real(psb_dpk_) :: x,y,z, zt(nb)
call idx2ijk(ix,iy,glob_row,idim,idim) #if defined(OPENMP)
! x, y coordinates nth = omp_get_num_threads()
x = (ix-1)*deltah ith = omp_get_thread_num()
y = (iy-1)*deltah #else
nth = 1
zt(k) = f_(x,y) ith = 0
! internal point: build discretization #endif
! allocate(val(20*nb),irow(20*nb),&
! term depending on (x-1,y) &icol(20*nb),stat=info)
! if (info /= psb_success_ ) then
val(icoeff) = -a1(x,y)/sqdeltah-b1(x,y)/deltah2 info=psb_err_alloc_dealloc_
if (ix == 1) then call psb_errpush(info,name)
zt(k) = g(dzero,y)*(-val(icoeff)) + zt(k) !goto 9999
else endif
call ijk2idx(icol(icoeff),ix-1,iy,idim,idim)
irow(icoeff) = glob_row ! loop over rows belonging to current process in a block
icoeff = icoeff+1 ! distribution.
endif !$omp do schedule(dynamic)
! term depending on (x,y-1) !
val(icoeff) = -a2(x,y)/sqdeltah-b2(x,y)/deltah2 do ii=1, nlr,nb
if (iy == 1) then ib = min(nb,nlr-ii+1)
zt(k) = g(x,dzero)*(-val(icoeff)) + zt(k) icoeff = 1
else do k=1,ib
call ijk2idx(icol(icoeff),ix,iy-1,idim,idim) i=ii+k-1
irow(icoeff) = glob_row ! local matrix pointer
icoeff = icoeff+1 glob_row=myidx(i)
endif ! compute gridpoint coordinates
call idx2ijk(ix,iy,glob_row,idim,idim)
! term depending on (x,y) ! x, y coordinates
val(icoeff)=(2*done)*(a1(x,y) + a2(x,y))/sqdeltah + c(x,y) x = (ix-1)*deltah
call ijk2idx(icol(icoeff),ix,iy,idim,idim) y = (iy-1)*deltah
irow(icoeff) = glob_row
icoeff = icoeff+1 zt(k) = f_(x,y)
! term depending on (x,y+1) ! internal point: build discretization
val(icoeff)=-a2(x,y)/sqdeltah+b2(x,y)/deltah2 !
if (iy == idim) then ! term depending on (x-1,y)
zt(k) = g(x,done)*(-val(icoeff)) + zt(k) !
else val(icoeff) = -a1(x,y)/sqdeltah-b1(x,y)/deltah2
call ijk2idx(icol(icoeff),ix,iy+1,idim,idim) if (ix == 1) then
irow(icoeff) = glob_row zt(k) = g(dzero,y)*(-val(icoeff)) + zt(k)
icoeff = icoeff+1 else
endif call ijk2idx(icol(icoeff),ix-1,iy,idim,idim)
! term depending on (x+1,y) irow(icoeff) = glob_row
val(icoeff)=-a1(x,y)/sqdeltah+b1(x,y)/deltah2 icoeff = icoeff+1
if (ix==idim) then endif
zt(k) = g(done,y)*(-val(icoeff)) + zt(k) ! term depending on (x,y-1)
else val(icoeff) = -a2(x,y)/sqdeltah-b2(x,y)/deltah2
call ijk2idx(icol(icoeff),ix+1,iy,idim,idim) if (iy == 1) then
zt(k) = g(x,dzero)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy-1,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y)
val(icoeff)=(2*done)*(a1(x,y) + a2(x,y))/sqdeltah + c(x,y)
call ijk2idx(icol(icoeff),ix,iy,idim,idim)
irow(icoeff) = glob_row irow(icoeff) = glob_row
icoeff = icoeff+1 icoeff = icoeff+1
endif ! term depending on (x,y+1)
val(icoeff)=-a2(x,y)/sqdeltah+b2(x,y)/deltah2
if (iy == idim) then
zt(k) = g(x,done)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy+1,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x+1,y)
val(icoeff)=-a1(x,y)/sqdeltah+b1(x,y)/deltah2
if (ix==idim) then
zt(k) = g(done,y)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix+1,iy,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
end do
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
if(info /= psb_success_) cycle
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
if(info /= psb_success_) cycle
zt(:)=dzero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) cycle
end do end do
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) !$omp end do
if(info /= psb_success_) exit
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) deallocate(val,irow,icol)
if(info /= psb_success_) exit end block
zt(:)=dzero !$omp end parallel
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) exit
end do
tgen = psb_wtime()-t1 tgen = psb_wtime()-t1
if(info /= psb_success_) then if(info /= psb_success_) then
@ -912,8 +938,6 @@ contains
goto 9999 goto 9999
end if end if
deallocate(val,irow,icol)
call psb_barrier(ctxt) call psb_barrier(ctxt)
t1 = psb_wtime() t1 = psb_wtime()
call psb_cdasb(desc_a,info) call psb_cdasb(desc_a,info)

@ -93,6 +93,9 @@ contains
& a1,a2,a3,b1,b2,b3,c,g,info,f,amold,vmold,partition, nrl,iv) & a1,a2,a3,b1,b2,b3,c,g,info,f,amold,vmold,partition, nrl,iv)
use psb_base_mod use psb_base_mod
use psb_util_mod use psb_util_mod
#if defined(OPENMP)
use omp_lib
#endif
! !
! Discretizes the partial differential equation ! Discretizes the partial differential equation
! !
@ -128,7 +131,6 @@ contains
type(psb_s_csc_sparse_mat) :: acsc type(psb_s_csc_sparse_mat) :: acsc
type(psb_s_coo_sparse_mat) :: acoo type(psb_s_coo_sparse_mat) :: acoo
type(psb_s_csr_sparse_mat) :: acsr type(psb_s_csr_sparse_mat) :: acsr
real(psb_spk_) :: zt(nb),x,y,z,xph,xmh,yph,ymh,zph,zmh
integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_ integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_
integer(psb_lpk_) :: m,n,glob_row,nt integer(psb_lpk_) :: m,n,glob_row,nt
integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
@ -141,8 +143,7 @@ contains
! Process grid ! Process grid
integer(psb_ipk_) :: np, iam integer(psb_ipk_) :: np, iam
integer(psb_ipk_) :: icoeff integer(psb_ipk_) :: icoeff
integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:) integer(psb_lpk_), allocatable :: myidx(:)
real(psb_spk_), allocatable :: val(:)
! deltah dimension of each grid cell ! deltah dimension of each grid cell
! deltat discretization time ! deltat discretization time
real(psb_spk_) :: deltah, sqdeltah, deltah2 real(psb_spk_) :: deltah, sqdeltah, deltah2
@ -368,119 +369,128 @@ contains
call psb_barrier(ctxt) call psb_barrier(ctxt)
talc = psb_wtime()-t0 talc = psb_wtime()-t0
if (info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='allocation rout.'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
! we build an auxiliary matrix consisting of one row at a
! time; just a small matrix. might be extended to generate
! a bunch of rows per call.
!
allocate(val(20*nb),irow(20*nb),&
&icol(20*nb),stat=info)
if (info /= psb_success_ ) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
endif
! loop over rows belonging to current process in a block
! distribution.
call psb_barrier(ctxt) call psb_barrier(ctxt)
t1 = psb_wtime() t1 = psb_wtime()
do ii=1, nlr,nb !$omp parallel shared(deltah,myidx,a,desc_a)
ib = min(nb,nlr-ii+1) !
icoeff = 1 block
do k=1,ib integer(psb_ipk_) :: i,j,k,ii,ib,icoeff, ix,iy,iz, ith,nth
i=ii+k-1 integer(psb_lpk_) :: glob_row
! local matrix pointer integer(psb_lpk_), allocatable :: irow(:),icol(:)
glob_row=myidx(i) real(psb_spk_), allocatable :: val(:)
! compute gridpoint coordinates real(psb_spk_) :: x,y,z, zt(nb)
call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) #if defined(OPENMP)
! x, y, z coordinates nth = omp_get_num_threads()
x = (ix-1)*deltah ith = omp_get_thread_num()
y = (iy-1)*deltah #else
z = (iz-1)*deltah nth = 1
zt(k) = f_(x,y,z) ith = 0
! internal point: build discretization #endif
! allocate(val(20*nb),irow(20*nb),&
! term depending on (x-1,y,z) &icol(20*nb),stat=info)
! if (info /= psb_success_ ) then
val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2 info=psb_err_alloc_dealloc_
if (ix == 1) then call psb_errpush(info,name)
zt(k) = g(szero,y,z)*(-val(icoeff)) + zt(k) !goto 9999
else endif
call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row !$omp do schedule(dynamic)
icoeff = icoeff+1 !
endif do ii=1, nlr, nb
! term depending on (x,y-1,z) if (info /= psb_success_) cycle
val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2 ib = min(nb,nlr-ii+1)
if (iy == 1) then icoeff = 1
zt(k) = g(x,szero,z)*(-val(icoeff)) + zt(k) do k=1,ib
else i=ii+k-1
call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) ! local matrix pointer
irow(icoeff) = glob_row glob_row=myidx(i)
icoeff = icoeff+1 ! compute gridpoint coordinates
endif call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
! term depending on (x,y,z-1) ! x, y, z coordinates
val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2 x = (ix-1)*deltah
if (iz == 1) then y = (iy-1)*deltah
zt(k) = g(x,y,szero)*(-val(icoeff)) + zt(k) z = (iz-1)*deltah
else zt(k) = f_(x,y,z)
call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) ! internal point: build discretization
irow(icoeff) = glob_row !
icoeff = icoeff+1 ! term depending on (x-1,y,z)
endif !
val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2
! term depending on (x,y,z) if (ix == 1) then
val(icoeff)=(2*sone)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah & zt(k) = g(szero,y,z)*(-val(icoeff)) + zt(k)
& + c(x,y,z) else
call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim) call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row irow(icoeff) = glob_row
icoeff = icoeff+1 icoeff = icoeff+1
! term depending on (x,y,z+1) endif
val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2 ! term depending on (x,y-1,z)
if (iz == idim) then val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2
zt(k) = g(x,y,sone)*(-val(icoeff)) + zt(k) if (iy == 1) then
else zt(k) = g(x,szero,z)*(-val(icoeff)) + zt(k)
call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim) else
irow(icoeff) = glob_row call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)
icoeff = icoeff+1 irow(icoeff) = glob_row
endif icoeff = icoeff+1
! term depending on (x,y+1,z) endif
val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2 ! term depending on (x,y,z-1)
if (iy == idim) then val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2
zt(k) = g(x,sone,z)*(-val(icoeff)) + zt(k) if (iz == 1) then
else zt(k) = g(x,y,szero)*(-val(icoeff)) + zt(k)
call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim) else
irow(icoeff) = glob_row call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)
icoeff = icoeff+1 irow(icoeff) = glob_row
endif icoeff = icoeff+1
! term depending on (x+1,y,z) endif
val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2
if (ix==idim) then ! term depending on (x,y,z)
zt(k) = g(sone,y,z)*(-val(icoeff)) + zt(k) val(icoeff)=(2*sone)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah &
else & + c(x,y,z)
call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim) call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row irow(icoeff) = glob_row
icoeff = icoeff+1 icoeff = icoeff+1
endif ! term depending on (x,y,z+1)
val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2
if (iz == idim) then
zt(k) = g(x,y,sone)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y+1,z)
val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2
if (iy == idim) then
zt(k) = g(x,sone,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x+1,y,z)
val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2
if (ix==idim) then
zt(k) = g(sone,y,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
end do
!write(0,*) ' Outer in_parallel ',omp_in_parallel()
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
if(info /= psb_success_) cycle
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
if(info /= psb_success_) cycle
zt(:)=szero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) cycle
end do end do
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) !$omp end do
if(info /= psb_success_) exit
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) deallocate(val,irow,icol)
if(info /= psb_success_) exit end block
zt(:)=szero !$omp end parallel
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) exit
end do
tgen = psb_wtime()-t1 tgen = psb_wtime()-t1
if(info /= psb_success_) then if(info /= psb_success_) then
@ -490,7 +500,6 @@ contains
goto 9999 goto 9999
end if end if
deallocate(val,irow,icol)
call psb_barrier(ctxt) call psb_barrier(ctxt)
t1 = psb_wtime() t1 = psb_wtime()
@ -557,6 +566,9 @@ contains
& a1,a2,b1,b2,c,g,info,f,amold,vmold,partition, nrl,iv) & a1,a2,b1,b2,c,g,info,f,amold,vmold,partition, nrl,iv)
use psb_base_mod use psb_base_mod
use psb_util_mod use psb_util_mod
#if defined(OPENMP)
use omp_lib
#endif
! !
! Discretizes the partial differential equation ! Discretizes the partial differential equation
! !
@ -591,7 +603,6 @@ contains
type(psb_s_csc_sparse_mat) :: acsc type(psb_s_csc_sparse_mat) :: acsc
type(psb_s_coo_sparse_mat) :: acoo type(psb_s_coo_sparse_mat) :: acoo
type(psb_s_csr_sparse_mat) :: acsr type(psb_s_csr_sparse_mat) :: acsr
real(psb_spk_) :: zt(nb),x,y,z,xph,xmh,yph,ymh,zph,zmh
integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_ integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_
integer(psb_lpk_) :: m,n,glob_row,nt integer(psb_lpk_) :: m,n,glob_row,nt
integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
@ -604,8 +615,7 @@ contains
! Process grid ! Process grid
integer(psb_ipk_) :: np, iam integer(psb_ipk_) :: np, iam
integer(psb_ipk_) :: icoeff integer(psb_ipk_) :: icoeff
integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:) integer(psb_lpk_), allocatable :: myidx(:)
real(psb_spk_), allocatable :: val(:)
! deltah dimension of each grid cell ! deltah dimension of each grid cell
! deltat discretization time ! deltat discretization time
real(psb_spk_) :: deltah, sqdeltah, deltah2, dd real(psb_spk_) :: deltah, sqdeltah, deltah2, dd
@ -816,93 +826,109 @@ contains
goto 9999 goto 9999
end if end if
! we build an auxiliary matrix consisting of one row at a
! time; just a small matrix. might be extended to generate
! a bunch of rows per call.
!
allocate(val(20*nb),irow(20*nb),&
&icol(20*nb),stat=info)
if (info /= psb_success_ ) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
endif
! loop over rows belonging to current process in a block
! distribution.
call psb_barrier(ctxt) call psb_barrier(ctxt)
t1 = psb_wtime() t1 = psb_wtime()
do ii=1, nlr,nb !$omp parallel shared(deltah,myidx,a,desc_a)
ib = min(nb,nlr-ii+1) !
icoeff = 1 block
do k=1,ib integer(psb_ipk_) :: i,j,k,ii,ib,icoeff, ix,iy,iz, ith,nth
i=ii+k-1 integer(psb_lpk_) :: glob_row
! local matrix pointer integer(psb_lpk_), allocatable :: irow(:),icol(:)
glob_row=myidx(i) real(psb_spk_), allocatable :: val(:)
! compute gridpoint coordinates real(psb_spk_) :: x,y,z, zt(nb)
call idx2ijk(ix,iy,glob_row,idim,idim) #if defined(OPENMP)
! x, y coordinates nth = omp_get_num_threads()
x = (ix-1)*deltah ith = omp_get_thread_num()
y = (iy-1)*deltah #else
nth = 1
zt(k) = f_(x,y) ith = 0
! internal point: build discretization #endif
! allocate(val(20*nb),irow(20*nb),&
! term depending on (x-1,y) &icol(20*nb),stat=info)
! if (info /= psb_success_ ) then
val(icoeff) = -a1(x,y)/sqdeltah-b1(x,y)/deltah2 info=psb_err_alloc_dealloc_
if (ix == 1) then call psb_errpush(info,name)
zt(k) = g(szero,y)*(-val(icoeff)) + zt(k) !goto 9999
else endif
call ijk2idx(icol(icoeff),ix-1,iy,idim,idim)
irow(icoeff) = glob_row ! loop over rows belonging to current process in a block
icoeff = icoeff+1 ! distribution.
endif !$omp do schedule(dynamic)
! term depending on (x,y-1) !
val(icoeff) = -a2(x,y)/sqdeltah-b2(x,y)/deltah2 do ii=1, nlr,nb
if (iy == 1) then ib = min(nb,nlr-ii+1)
zt(k) = g(x,szero)*(-val(icoeff)) + zt(k) icoeff = 1
else do k=1,ib
call ijk2idx(icol(icoeff),ix,iy-1,idim,idim) i=ii+k-1
irow(icoeff) = glob_row ! local matrix pointer
icoeff = icoeff+1 glob_row=myidx(i)
endif ! compute gridpoint coordinates
call idx2ijk(ix,iy,glob_row,idim,idim)
! term depending on (x,y) ! x, y coordinates
val(icoeff)=(2*sone)*(a1(x,y) + a2(x,y))/sqdeltah + c(x,y) x = (ix-1)*deltah
call ijk2idx(icol(icoeff),ix,iy,idim,idim) y = (iy-1)*deltah
irow(icoeff) = glob_row
icoeff = icoeff+1 zt(k) = f_(x,y)
! term depending on (x,y+1) ! internal point: build discretization
val(icoeff)=-a2(x,y)/sqdeltah+b2(x,y)/deltah2 !
if (iy == idim) then ! term depending on (x-1,y)
zt(k) = g(x,sone)*(-val(icoeff)) + zt(k) !
else val(icoeff) = -a1(x,y)/sqdeltah-b1(x,y)/deltah2
call ijk2idx(icol(icoeff),ix,iy+1,idim,idim) if (ix == 1) then
irow(icoeff) = glob_row zt(k) = g(szero,y)*(-val(icoeff)) + zt(k)
icoeff = icoeff+1 else
endif call ijk2idx(icol(icoeff),ix-1,iy,idim,idim)
! term depending on (x+1,y) irow(icoeff) = glob_row
val(icoeff)=-a1(x,y)/sqdeltah+b1(x,y)/deltah2 icoeff = icoeff+1
if (ix==idim) then endif
zt(k) = g(sone,y)*(-val(icoeff)) + zt(k) ! term depending on (x,y-1)
else val(icoeff) = -a2(x,y)/sqdeltah-b2(x,y)/deltah2
call ijk2idx(icol(icoeff),ix+1,iy,idim,idim) if (iy == 1) then
zt(k) = g(x,szero)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy-1,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y)
val(icoeff)=(2*sone)*(a1(x,y) + a2(x,y))/sqdeltah + c(x,y)
call ijk2idx(icol(icoeff),ix,iy,idim,idim)
irow(icoeff) = glob_row irow(icoeff) = glob_row
icoeff = icoeff+1 icoeff = icoeff+1
endif ! term depending on (x,y+1)
val(icoeff)=-a2(x,y)/sqdeltah+b2(x,y)/deltah2
if (iy == idim) then
zt(k) = g(x,sone)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy+1,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x+1,y)
val(icoeff)=-a1(x,y)/sqdeltah+b1(x,y)/deltah2
if (ix==idim) then
zt(k) = g(sone,y)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix+1,iy,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
end do
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
if(info /= psb_success_) cycle
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
if(info /= psb_success_) cycle
zt(:)=szero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) cycle
end do end do
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) !$omp end do
if(info /= psb_success_) exit
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) deallocate(val,irow,icol)
if(info /= psb_success_) exit end block
zt(:)=szero !$omp end parallel
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) exit
end do
tgen = psb_wtime()-t1 tgen = psb_wtime()-t1
if(info /= psb_success_) then if(info /= psb_success_) then
@ -912,8 +938,6 @@ contains
goto 9999 goto 9999
end if end if
deallocate(val,irow,icol)
call psb_barrier(ctxt) call psb_barrier(ctxt)
t1 = psb_wtime() t1 = psb_wtime()
call psb_cdasb(desc_a,info) call psb_cdasb(desc_a,info)

Loading…
Cancel
Save