From 68f20c0e7a04467f00fcc2b6febbd891a2d61557 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 7 Oct 2024 12:44:45 +0200 Subject: [PATCH 1/4] Modify init --- openacc/psb_oacc_env_mod.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openacc/psb_oacc_env_mod.F90 b/openacc/psb_oacc_env_mod.F90 index dc01ad3a..6d810f74 100644 --- a/openacc/psb_oacc_env_mod.F90 +++ b/openacc/psb_oacc_env_mod.F90 @@ -18,7 +18,7 @@ contains subroutine psb_oacc_init(ctxt, dev) type(psb_ctxt_type), intent(in) :: ctxt integer, intent(in), optional :: dev - + oacc_do_maybe_free_buffer = .false. end subroutine psb_oacc_init subroutine psb_oacc_exit() From 740609a4d8527de07a27d647f7c8b973142439aa Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 7 Oct 2024 12:45:18 +0200 Subject: [PATCH 2/4] Fix present() clauses --- openacc/psb_c_oacc_vect_mod.F90 | 80 ++++++++++++++++++++------------- openacc/psb_d_oacc_vect_mod.F90 | 80 ++++++++++++++++++++------------- openacc/psb_i_oacc_vect_mod.F90 | 58 +++++++++++++++--------- openacc/psb_l_oacc_vect_mod.F90 | 58 +++++++++++++++--------- openacc/psb_s_oacc_vect_mod.F90 | 80 ++++++++++++++++++++------------- openacc/psb_z_oacc_vect_mod.F90 | 80 ++++++++++++++++++++------------- 6 files changed, 266 insertions(+), 170 deletions(-) diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90 index e479f58d..40437184 100644 --- a/openacc/psb_c_oacc_vect_mod.F90 +++ b/openacc/psb_c_oacc_vect_mod.F90 @@ -113,7 +113,7 @@ contains complex(psb_spk_), intent(inout) :: x(:) integer(psb_ipk_) :: n integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x) do i = 1, n x(i) = abs(x(i)) end do @@ -144,7 +144,7 @@ contains complex(psb_spk_), intent(inout) :: x(:),y(:) integer(psb_ipk_) :: n integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x,y) do i = 1, n y(i) = abs(x(i)) end do @@ -164,7 +164,7 @@ contains complex(psb_spk_), intent(in) :: alpha complex(psb_spk_), intent(inout) :: x(:) integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x) do i = 1, size(x) x(i) = alpha * x(i) end do @@ -189,7 +189,7 @@ contains real(psb_spk_) :: sum, mx integer(psb_ipk_) :: i mx = szero - !$acc parallel loop reduction(max:mx) + !$acc parallel loop reduction(max:mx) present(x) do i = 1, n if (abs(x(i)) > mx) mx = abs(x(i)) end do @@ -197,7 +197,7 @@ contains res = mx else sum = szero - !$acc parallel loop reduction(+:sum) + !$acc parallel loop reduction(+:sum) present(x) do i = 1, n sum = sum + abs(x(i)/mx)**2 end do @@ -223,7 +223,7 @@ contains real(psb_spk_) :: max_val integer(psb_ipk_) :: i max_val = szero - !$acc parallel loop reduction(max:max_val) + !$acc parallel loop reduction(max:max_val) present(x) do i = 1, n if (abs(x(i)) > max_val) max_val = abs(x(i)) end do @@ -248,7 +248,7 @@ contains real(psb_spk_) :: res integer(psb_ipk_) :: i res = szero - !$acc parallel loop reduction(+:res) + !$acc parallel loop reduction(+:res) present(x) do i = 1, n res = res + abs(x(i)) end do @@ -265,7 +265,7 @@ contains info = 0 if (y%is_dev()) call y%sync() - !$acc parallel loop + !$acc parallel loop present(x,y) do i = 1, size(x) y%v(i) = y%v(i) * x(i) end do @@ -283,7 +283,7 @@ contains info = 0 if (z%is_dev()) call z%sync() - !$acc parallel loop + !$acc parallel loop present(x,y,z%v) do i = 1, size(x) z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) end do @@ -327,7 +327,7 @@ contains complex(psb_spk_), intent(inout) :: y(:) complex(psb_spk_), intent(in) :: alpha, beta integer(psb_ipk_), intent(out) :: info - !$acc parallel + !$acc parallel present(x,y) !$acc loop do i = 1, m y(i) = alpha * x(i) + beta * y(i) @@ -384,7 +384,7 @@ contains if ((nx < m) .or. (ny < m) .or. (nz < m)) then info = psb_err_internal_error_ else - !$acc parallel loop + !$acc parallel loop present(xx%v,yy%v,zz%v) do i = 1, m yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) @@ -416,6 +416,7 @@ contains integer(psb_ipk_) :: info, k logical :: acc_done if (.not.allocated(y%combuf)) then + write(0,*) 'allocation error for y%combuf ' call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') return end if @@ -443,8 +444,8 @@ contains integer(psb_ipk_) :: n, idx(:) complex(psb_spk_) :: beta,x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -488,8 +489,8 @@ contains integer(psb_ipk_) :: n, idx(:) complex(psb_spk_) :: beta, x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -531,6 +532,7 @@ contains acc_done = .false. if (.not.allocated(x%combuf)) then + write(0,*) 'oacc allocation error combuf gthzbuf ' call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') return end if @@ -556,13 +558,13 @@ contains integer(psb_ipk_) :: n, idx(:) complex(psb_spk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine c_oacc_gthzbuf @@ -600,13 +602,13 @@ contains integer(psb_ipk_) :: n, idx(:) complex(psb_spk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine c_oacc_gthzv_x @@ -633,7 +635,7 @@ contains if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() - !$acc parallel loop + !$acc parallel loop present(x%v,virl%v,vval%v) do i = 1, n x%v(virl%v(i)) = vval%v(i) end do @@ -757,7 +759,7 @@ contains if (present(first)) first_ = max(1, first) if (present(last)) last_ = min(last, last_) - !$acc parallel loop + !$acc parallel loop present(x%v) do i = first_, last_ x%v(i) = val end do @@ -849,26 +851,36 @@ contains class(psb_c_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (n /= psb_size(x%combuf)) then + + !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf) + if (n > psb_size(x%combuf)) then + !write(0,*) 'oacc new_buffer: reallocating ' + if (allocated(x%combuf)) then + !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + !$acc exit data delete(x%combuf) + end if call x%psb_c_base_vect_type%new_buffer(n,info) !$acc enter data copyin(x%combuf) + ! call acc_copyin(x%combuf) end if end subroutine c_oacc_new_buffer subroutine c_oacc_sync_dev_space(x) implicit none class(psb_c_vect_oacc), intent(inout) :: x - if (allocated(x%v)) call acc_create(x%v) +!!$ write(0,*) 'oacc sync_dev_space' + if (psb_size(x%v)>0) call acc_copyin(x%v) end subroutine c_oacc_sync_dev_space subroutine c_oacc_sync(x) implicit none class(psb_c_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call acc_update_self(x%v) + if (psb_size(x%v)>0) call acc_update_self(x%v) end if if (x%is_host()) then - call acc_update_device(x%v) + if (.not.acc_is_present(x%v)) call c_oacc_sync_dev_space(x) + if (psb_size(x%v)>0) call acc_update_device(x%v) end if call x%set_sync() end subroutine c_oacc_sync @@ -941,6 +953,8 @@ contains type(psb_c_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info info = 0 +!!$ write(0,*) 'oacc final_vect_free' + call x%free_buffer(info) if (allocated(x%v)) then if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) @@ -953,8 +967,9 @@ contains class(psb_c_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 +!!$ write(0,*) 'oacc vect_free' + call x%free_buffer(info) if (acc_is_present(x%v)) call acc_delete_finalize(x%v) - if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_c_base_vect_type%free(info) end subroutine c_oacc_vect_free @@ -964,8 +979,10 @@ contains integer(psb_ipk_), intent(out) :: info info = 0 - if (psb_oacc_get_maybe_free_buffer())& - & call x%free_buffer(info) + if (psb_oacc_get_maybe_free_buffer()) then + !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer() + call x%free_buffer(info) + end if end subroutine c_oacc_vect_maybe_free_buffer @@ -973,7 +990,7 @@ contains implicit none class(psb_c_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info - +! write(0,*) 'oacc free_buffer' info = 0 if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_c_base_vect_type%free_buffer(info) @@ -985,7 +1002,6 @@ contains class(psb_c_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: res - if (x%is_dev()) call x%sync() res = size(x%v) end function c_oacc_get_size diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90 index 7fd2a441..84441c8a 100644 --- a/openacc/psb_d_oacc_vect_mod.F90 +++ b/openacc/psb_d_oacc_vect_mod.F90 @@ -113,7 +113,7 @@ contains real(psb_dpk_), intent(inout) :: x(:) integer(psb_ipk_) :: n integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x) do i = 1, n x(i) = abs(x(i)) end do @@ -144,7 +144,7 @@ contains real(psb_dpk_), intent(inout) :: x(:),y(:) integer(psb_ipk_) :: n integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x,y) do i = 1, n y(i) = abs(x(i)) end do @@ -164,7 +164,7 @@ contains real(psb_dpk_), intent(in) :: alpha real(psb_dpk_), intent(inout) :: x(:) integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x) do i = 1, size(x) x(i) = alpha * x(i) end do @@ -189,7 +189,7 @@ contains real(psb_dpk_) :: sum, mx integer(psb_ipk_) :: i mx = dzero - !$acc parallel loop reduction(max:mx) + !$acc parallel loop reduction(max:mx) present(x) do i = 1, n if (abs(x(i)) > mx) mx = abs(x(i)) end do @@ -197,7 +197,7 @@ contains res = mx else sum = dzero - !$acc parallel loop reduction(+:sum) + !$acc parallel loop reduction(+:sum) present(x) do i = 1, n sum = sum + abs(x(i)/mx)**2 end do @@ -223,7 +223,7 @@ contains real(psb_dpk_) :: max_val integer(psb_ipk_) :: i max_val = dzero - !$acc parallel loop reduction(max:max_val) + !$acc parallel loop reduction(max:max_val) present(x) do i = 1, n if (abs(x(i)) > max_val) max_val = abs(x(i)) end do @@ -248,7 +248,7 @@ contains real(psb_dpk_) :: res integer(psb_ipk_) :: i res = dzero - !$acc parallel loop reduction(+:res) + !$acc parallel loop reduction(+:res) present(x) do i = 1, n res = res + abs(x(i)) end do @@ -265,7 +265,7 @@ contains info = 0 if (y%is_dev()) call y%sync() - !$acc parallel loop + !$acc parallel loop present(x,y) do i = 1, size(x) y%v(i) = y%v(i) * x(i) end do @@ -283,7 +283,7 @@ contains info = 0 if (z%is_dev()) call z%sync() - !$acc parallel loop + !$acc parallel loop present(x,y,z%v) do i = 1, size(x) z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) end do @@ -327,7 +327,7 @@ contains real(psb_dpk_), intent(inout) :: y(:) real(psb_dpk_), intent(in) :: alpha, beta integer(psb_ipk_), intent(out) :: info - !$acc parallel + !$acc parallel present(x,y) !$acc loop do i = 1, m y(i) = alpha * x(i) + beta * y(i) @@ -384,7 +384,7 @@ contains if ((nx < m) .or. (ny < m) .or. (nz < m)) then info = psb_err_internal_error_ else - !$acc parallel loop + !$acc parallel loop present(xx%v,yy%v,zz%v) do i = 1, m yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) @@ -416,6 +416,7 @@ contains integer(psb_ipk_) :: info, k logical :: acc_done if (.not.allocated(y%combuf)) then + write(0,*) 'allocation error for y%combuf ' call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') return end if @@ -443,8 +444,8 @@ contains integer(psb_ipk_) :: n, idx(:) real(psb_dpk_) :: beta,x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -488,8 +489,8 @@ contains integer(psb_ipk_) :: n, idx(:) real(psb_dpk_) :: beta, x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -531,6 +532,7 @@ contains acc_done = .false. if (.not.allocated(x%combuf)) then + write(0,*) 'oacc allocation error combuf gthzbuf ' call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') return end if @@ -556,13 +558,13 @@ contains integer(psb_ipk_) :: n, idx(:) real(psb_dpk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine d_oacc_gthzbuf @@ -600,13 +602,13 @@ contains integer(psb_ipk_) :: n, idx(:) real(psb_dpk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine d_oacc_gthzv_x @@ -633,7 +635,7 @@ contains if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() - !$acc parallel loop + !$acc parallel loop present(x%v,virl%v,vval%v) do i = 1, n x%v(virl%v(i)) = vval%v(i) end do @@ -757,7 +759,7 @@ contains if (present(first)) first_ = max(1, first) if (present(last)) last_ = min(last, last_) - !$acc parallel loop + !$acc parallel loop present(x%v) do i = first_, last_ x%v(i) = val end do @@ -849,26 +851,36 @@ contains class(psb_d_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (n /= psb_size(x%combuf)) then + + !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf) + if (n > psb_size(x%combuf)) then + !write(0,*) 'oacc new_buffer: reallocating ' + if (allocated(x%combuf)) then + !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + !$acc exit data delete(x%combuf) + end if call x%psb_d_base_vect_type%new_buffer(n,info) !$acc enter data copyin(x%combuf) + ! call acc_copyin(x%combuf) end if end subroutine d_oacc_new_buffer subroutine d_oacc_sync_dev_space(x) implicit none class(psb_d_vect_oacc), intent(inout) :: x - if (allocated(x%v)) call acc_create(x%v) +!!$ write(0,*) 'oacc sync_dev_space' + if (psb_size(x%v)>0) call acc_copyin(x%v) end subroutine d_oacc_sync_dev_space subroutine d_oacc_sync(x) implicit none class(psb_d_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call acc_update_self(x%v) + if (psb_size(x%v)>0) call acc_update_self(x%v) end if if (x%is_host()) then - call acc_update_device(x%v) + if (.not.acc_is_present(x%v)) call d_oacc_sync_dev_space(x) + if (psb_size(x%v)>0) call acc_update_device(x%v) end if call x%set_sync() end subroutine d_oacc_sync @@ -941,6 +953,8 @@ contains type(psb_d_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info info = 0 +!!$ write(0,*) 'oacc final_vect_free' + call x%free_buffer(info) if (allocated(x%v)) then if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) @@ -953,8 +967,9 @@ contains class(psb_d_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 +!!$ write(0,*) 'oacc vect_free' + call x%free_buffer(info) if (acc_is_present(x%v)) call acc_delete_finalize(x%v) - if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_d_base_vect_type%free(info) end subroutine d_oacc_vect_free @@ -964,8 +979,10 @@ contains integer(psb_ipk_), intent(out) :: info info = 0 - if (psb_oacc_get_maybe_free_buffer())& - & call x%free_buffer(info) + if (psb_oacc_get_maybe_free_buffer()) then + !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer() + call x%free_buffer(info) + end if end subroutine d_oacc_vect_maybe_free_buffer @@ -973,7 +990,7 @@ contains implicit none class(psb_d_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info - +! write(0,*) 'oacc free_buffer' info = 0 if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_d_base_vect_type%free_buffer(info) @@ -985,7 +1002,6 @@ contains class(psb_d_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: res - if (x%is_dev()) call x%sync() res = size(x%v) end function d_oacc_get_size diff --git a/openacc/psb_i_oacc_vect_mod.F90 b/openacc/psb_i_oacc_vect_mod.F90 index 455453a1..42cdc18c 100644 --- a/openacc/psb_i_oacc_vect_mod.F90 +++ b/openacc/psb_i_oacc_vect_mod.F90 @@ -70,6 +70,7 @@ contains integer(psb_ipk_) :: info, k logical :: acc_done if (.not.allocated(y%combuf)) then + write(0,*) 'allocation error for y%combuf ' call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') return end if @@ -97,8 +98,8 @@ contains integer(psb_ipk_) :: n, idx(:) integer(psb_ipk_) :: beta,x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -142,8 +143,8 @@ contains integer(psb_ipk_) :: n, idx(:) integer(psb_ipk_) :: beta, x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -185,6 +186,7 @@ contains acc_done = .false. if (.not.allocated(x%combuf)) then + write(0,*) 'oacc allocation error combuf gthzbuf ' call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') return end if @@ -210,13 +212,13 @@ contains integer(psb_ipk_) :: n, idx(:) integer(psb_ipk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine i_oacc_gthzbuf @@ -254,13 +256,13 @@ contains integer(psb_ipk_) :: n, idx(:) integer(psb_ipk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine i_oacc_gthzv_x @@ -287,7 +289,7 @@ contains if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() - !$acc parallel loop + !$acc parallel loop present(x%v,virl%v,vval%v) do i = 1, n x%v(virl%v(i)) = vval%v(i) end do @@ -411,7 +413,7 @@ contains if (present(first)) first_ = max(1, first) if (present(last)) last_ = min(last, last_) - !$acc parallel loop + !$acc parallel loop present(x%v) do i = first_, last_ x%v(i) = val end do @@ -449,26 +451,36 @@ contains class(psb_i_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (n /= psb_size(x%combuf)) then + + !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf) + if (n > psb_size(x%combuf)) then + !write(0,*) 'oacc new_buffer: reallocating ' + if (allocated(x%combuf)) then + !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + !$acc exit data delete(x%combuf) + end if call x%psb_i_base_vect_type%new_buffer(n,info) !$acc enter data copyin(x%combuf) + ! call acc_copyin(x%combuf) end if end subroutine i_oacc_new_buffer subroutine i_oacc_sync_dev_space(x) implicit none class(psb_i_vect_oacc), intent(inout) :: x - if (allocated(x%v)) call acc_create(x%v) +!!$ write(0,*) 'oacc sync_dev_space' + if (psb_size(x%v)>0) call acc_copyin(x%v) end subroutine i_oacc_sync_dev_space subroutine i_oacc_sync(x) implicit none class(psb_i_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call acc_update_self(x%v) + if (psb_size(x%v)>0) call acc_update_self(x%v) end if if (x%is_host()) then - call acc_update_device(x%v) + if (.not.acc_is_present(x%v)) call i_oacc_sync_dev_space(x) + if (psb_size(x%v)>0) call acc_update_device(x%v) end if call x%set_sync() end subroutine i_oacc_sync @@ -541,6 +553,8 @@ contains type(psb_i_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info info = 0 +!!$ write(0,*) 'oacc final_vect_free' + call x%free_buffer(info) if (allocated(x%v)) then if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) @@ -553,8 +567,9 @@ contains class(psb_i_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 +!!$ write(0,*) 'oacc vect_free' + call x%free_buffer(info) if (acc_is_present(x%v)) call acc_delete_finalize(x%v) - if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_i_base_vect_type%free(info) end subroutine i_oacc_vect_free @@ -564,8 +579,10 @@ contains integer(psb_ipk_), intent(out) :: info info = 0 - if (psb_oacc_get_maybe_free_buffer())& - & call x%free_buffer(info) + if (psb_oacc_get_maybe_free_buffer()) then + !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer() + call x%free_buffer(info) + end if end subroutine i_oacc_vect_maybe_free_buffer @@ -573,7 +590,7 @@ contains implicit none class(psb_i_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info - +! write(0,*) 'oacc free_buffer' info = 0 if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_i_base_vect_type%free_buffer(info) @@ -585,7 +602,6 @@ contains class(psb_i_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: res - if (x%is_dev()) call x%sync() res = size(x%v) end function i_oacc_get_size diff --git a/openacc/psb_l_oacc_vect_mod.F90 b/openacc/psb_l_oacc_vect_mod.F90 index d35e9141..60cdee35 100644 --- a/openacc/psb_l_oacc_vect_mod.F90 +++ b/openacc/psb_l_oacc_vect_mod.F90 @@ -72,6 +72,7 @@ contains integer(psb_ipk_) :: info, k logical :: acc_done if (.not.allocated(y%combuf)) then + write(0,*) 'allocation error for y%combuf ' call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') return end if @@ -99,8 +100,8 @@ contains integer(psb_ipk_) :: n, idx(:) integer(psb_lpk_) :: beta,x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -144,8 +145,8 @@ contains integer(psb_ipk_) :: n, idx(:) integer(psb_lpk_) :: beta, x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -187,6 +188,7 @@ contains acc_done = .false. if (.not.allocated(x%combuf)) then + write(0,*) 'oacc allocation error combuf gthzbuf ' call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') return end if @@ -212,13 +214,13 @@ contains integer(psb_ipk_) :: n, idx(:) integer(psb_lpk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine l_oacc_gthzbuf @@ -256,13 +258,13 @@ contains integer(psb_ipk_) :: n, idx(:) integer(psb_lpk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine l_oacc_gthzv_x @@ -289,7 +291,7 @@ contains if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() - !$acc parallel loop + !$acc parallel loop present(x%v,virl%v,vval%v) do i = 1, n x%v(virl%v(i)) = vval%v(i) end do @@ -413,7 +415,7 @@ contains if (present(first)) first_ = max(1, first) if (present(last)) last_ = min(last, last_) - !$acc parallel loop + !$acc parallel loop present(x%v) do i = first_, last_ x%v(i) = val end do @@ -451,26 +453,36 @@ contains class(psb_l_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (n /= psb_size(x%combuf)) then + + !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf) + if (n > psb_size(x%combuf)) then + !write(0,*) 'oacc new_buffer: reallocating ' + if (allocated(x%combuf)) then + !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + !$acc exit data delete(x%combuf) + end if call x%psb_l_base_vect_type%new_buffer(n,info) !$acc enter data copyin(x%combuf) + ! call acc_copyin(x%combuf) end if end subroutine l_oacc_new_buffer subroutine l_oacc_sync_dev_space(x) implicit none class(psb_l_vect_oacc), intent(inout) :: x - if (allocated(x%v)) call acc_create(x%v) +!!$ write(0,*) 'oacc sync_dev_space' + if (psb_size(x%v)>0) call acc_copyin(x%v) end subroutine l_oacc_sync_dev_space subroutine l_oacc_sync(x) implicit none class(psb_l_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call acc_update_self(x%v) + if (psb_size(x%v)>0) call acc_update_self(x%v) end if if (x%is_host()) then - call acc_update_device(x%v) + if (.not.acc_is_present(x%v)) call l_oacc_sync_dev_space(x) + if (psb_size(x%v)>0) call acc_update_device(x%v) end if call x%set_sync() end subroutine l_oacc_sync @@ -543,6 +555,8 @@ contains type(psb_l_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info info = 0 +!!$ write(0,*) 'oacc final_vect_free' + call x%free_buffer(info) if (allocated(x%v)) then if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) @@ -555,8 +569,9 @@ contains class(psb_l_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 +!!$ write(0,*) 'oacc vect_free' + call x%free_buffer(info) if (acc_is_present(x%v)) call acc_delete_finalize(x%v) - if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_l_base_vect_type%free(info) end subroutine l_oacc_vect_free @@ -566,8 +581,10 @@ contains integer(psb_ipk_), intent(out) :: info info = 0 - if (psb_oacc_get_maybe_free_buffer())& - & call x%free_buffer(info) + if (psb_oacc_get_maybe_free_buffer()) then + !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer() + call x%free_buffer(info) + end if end subroutine l_oacc_vect_maybe_free_buffer @@ -575,7 +592,7 @@ contains implicit none class(psb_l_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info - +! write(0,*) 'oacc free_buffer' info = 0 if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_l_base_vect_type%free_buffer(info) @@ -587,7 +604,6 @@ contains class(psb_l_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: res - if (x%is_dev()) call x%sync() res = size(x%v) end function l_oacc_get_size diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90 index 87eeccea..70c9dd49 100644 --- a/openacc/psb_s_oacc_vect_mod.F90 +++ b/openacc/psb_s_oacc_vect_mod.F90 @@ -113,7 +113,7 @@ contains real(psb_spk_), intent(inout) :: x(:) integer(psb_ipk_) :: n integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x) do i = 1, n x(i) = abs(x(i)) end do @@ -144,7 +144,7 @@ contains real(psb_spk_), intent(inout) :: x(:),y(:) integer(psb_ipk_) :: n integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x,y) do i = 1, n y(i) = abs(x(i)) end do @@ -164,7 +164,7 @@ contains real(psb_spk_), intent(in) :: alpha real(psb_spk_), intent(inout) :: x(:) integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x) do i = 1, size(x) x(i) = alpha * x(i) end do @@ -189,7 +189,7 @@ contains real(psb_spk_) :: sum, mx integer(psb_ipk_) :: i mx = szero - !$acc parallel loop reduction(max:mx) + !$acc parallel loop reduction(max:mx) present(x) do i = 1, n if (abs(x(i)) > mx) mx = abs(x(i)) end do @@ -197,7 +197,7 @@ contains res = mx else sum = szero - !$acc parallel loop reduction(+:sum) + !$acc parallel loop reduction(+:sum) present(x) do i = 1, n sum = sum + abs(x(i)/mx)**2 end do @@ -223,7 +223,7 @@ contains real(psb_spk_) :: max_val integer(psb_ipk_) :: i max_val = szero - !$acc parallel loop reduction(max:max_val) + !$acc parallel loop reduction(max:max_val) present(x) do i = 1, n if (abs(x(i)) > max_val) max_val = abs(x(i)) end do @@ -248,7 +248,7 @@ contains real(psb_spk_) :: res integer(psb_ipk_) :: i res = szero - !$acc parallel loop reduction(+:res) + !$acc parallel loop reduction(+:res) present(x) do i = 1, n res = res + abs(x(i)) end do @@ -265,7 +265,7 @@ contains info = 0 if (y%is_dev()) call y%sync() - !$acc parallel loop + !$acc parallel loop present(x,y) do i = 1, size(x) y%v(i) = y%v(i) * x(i) end do @@ -283,7 +283,7 @@ contains info = 0 if (z%is_dev()) call z%sync() - !$acc parallel loop + !$acc parallel loop present(x,y,z%v) do i = 1, size(x) z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) end do @@ -327,7 +327,7 @@ contains real(psb_spk_), intent(inout) :: y(:) real(psb_spk_), intent(in) :: alpha, beta integer(psb_ipk_), intent(out) :: info - !$acc parallel + !$acc parallel present(x,y) !$acc loop do i = 1, m y(i) = alpha * x(i) + beta * y(i) @@ -384,7 +384,7 @@ contains if ((nx < m) .or. (ny < m) .or. (nz < m)) then info = psb_err_internal_error_ else - !$acc parallel loop + !$acc parallel loop present(xx%v,yy%v,zz%v) do i = 1, m yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) @@ -416,6 +416,7 @@ contains integer(psb_ipk_) :: info, k logical :: acc_done if (.not.allocated(y%combuf)) then + write(0,*) 'allocation error for y%combuf ' call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') return end if @@ -443,8 +444,8 @@ contains integer(psb_ipk_) :: n, idx(:) real(psb_spk_) :: beta,x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -488,8 +489,8 @@ contains integer(psb_ipk_) :: n, idx(:) real(psb_spk_) :: beta, x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -531,6 +532,7 @@ contains acc_done = .false. if (.not.allocated(x%combuf)) then + write(0,*) 'oacc allocation error combuf gthzbuf ' call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') return end if @@ -556,13 +558,13 @@ contains integer(psb_ipk_) :: n, idx(:) real(psb_spk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine s_oacc_gthzbuf @@ -600,13 +602,13 @@ contains integer(psb_ipk_) :: n, idx(:) real(psb_spk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine s_oacc_gthzv_x @@ -633,7 +635,7 @@ contains if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() - !$acc parallel loop + !$acc parallel loop present(x%v,virl%v,vval%v) do i = 1, n x%v(virl%v(i)) = vval%v(i) end do @@ -757,7 +759,7 @@ contains if (present(first)) first_ = max(1, first) if (present(last)) last_ = min(last, last_) - !$acc parallel loop + !$acc parallel loop present(x%v) do i = first_, last_ x%v(i) = val end do @@ -849,26 +851,36 @@ contains class(psb_s_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (n /= psb_size(x%combuf)) then + + !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf) + if (n > psb_size(x%combuf)) then + !write(0,*) 'oacc new_buffer: reallocating ' + if (allocated(x%combuf)) then + !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + !$acc exit data delete(x%combuf) + end if call x%psb_s_base_vect_type%new_buffer(n,info) !$acc enter data copyin(x%combuf) + ! call acc_copyin(x%combuf) end if end subroutine s_oacc_new_buffer subroutine s_oacc_sync_dev_space(x) implicit none class(psb_s_vect_oacc), intent(inout) :: x - if (allocated(x%v)) call acc_create(x%v) +!!$ write(0,*) 'oacc sync_dev_space' + if (psb_size(x%v)>0) call acc_copyin(x%v) end subroutine s_oacc_sync_dev_space subroutine s_oacc_sync(x) implicit none class(psb_s_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call acc_update_self(x%v) + if (psb_size(x%v)>0) call acc_update_self(x%v) end if if (x%is_host()) then - call acc_update_device(x%v) + if (.not.acc_is_present(x%v)) call s_oacc_sync_dev_space(x) + if (psb_size(x%v)>0) call acc_update_device(x%v) end if call x%set_sync() end subroutine s_oacc_sync @@ -941,6 +953,8 @@ contains type(psb_s_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info info = 0 +!!$ write(0,*) 'oacc final_vect_free' + call x%free_buffer(info) if (allocated(x%v)) then if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) @@ -953,8 +967,9 @@ contains class(psb_s_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 +!!$ write(0,*) 'oacc vect_free' + call x%free_buffer(info) if (acc_is_present(x%v)) call acc_delete_finalize(x%v) - if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_s_base_vect_type%free(info) end subroutine s_oacc_vect_free @@ -964,8 +979,10 @@ contains integer(psb_ipk_), intent(out) :: info info = 0 - if (psb_oacc_get_maybe_free_buffer())& - & call x%free_buffer(info) + if (psb_oacc_get_maybe_free_buffer()) then + !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer() + call x%free_buffer(info) + end if end subroutine s_oacc_vect_maybe_free_buffer @@ -973,7 +990,7 @@ contains implicit none class(psb_s_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info - +! write(0,*) 'oacc free_buffer' info = 0 if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_s_base_vect_type%free_buffer(info) @@ -985,7 +1002,6 @@ contains class(psb_s_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: res - if (x%is_dev()) call x%sync() res = size(x%v) end function s_oacc_get_size diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90 index 0fe1adaa..0bc10283 100644 --- a/openacc/psb_z_oacc_vect_mod.F90 +++ b/openacc/psb_z_oacc_vect_mod.F90 @@ -113,7 +113,7 @@ contains complex(psb_dpk_), intent(inout) :: x(:) integer(psb_ipk_) :: n integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x) do i = 1, n x(i) = abs(x(i)) end do @@ -144,7 +144,7 @@ contains complex(psb_dpk_), intent(inout) :: x(:),y(:) integer(psb_ipk_) :: n integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x,y) do i = 1, n y(i) = abs(x(i)) end do @@ -164,7 +164,7 @@ contains complex(psb_dpk_), intent(in) :: alpha complex(psb_dpk_), intent(inout) :: x(:) integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x) do i = 1, size(x) x(i) = alpha * x(i) end do @@ -189,7 +189,7 @@ contains real(psb_dpk_) :: sum, mx integer(psb_ipk_) :: i mx = dzero - !$acc parallel loop reduction(max:mx) + !$acc parallel loop reduction(max:mx) present(x) do i = 1, n if (abs(x(i)) > mx) mx = abs(x(i)) end do @@ -197,7 +197,7 @@ contains res = mx else sum = dzero - !$acc parallel loop reduction(+:sum) + !$acc parallel loop reduction(+:sum) present(x) do i = 1, n sum = sum + abs(x(i)/mx)**2 end do @@ -223,7 +223,7 @@ contains real(psb_dpk_) :: max_val integer(psb_ipk_) :: i max_val = dzero - !$acc parallel loop reduction(max:max_val) + !$acc parallel loop reduction(max:max_val) present(x) do i = 1, n if (abs(x(i)) > max_val) max_val = abs(x(i)) end do @@ -248,7 +248,7 @@ contains real(psb_dpk_) :: res integer(psb_ipk_) :: i res = dzero - !$acc parallel loop reduction(+:res) + !$acc parallel loop reduction(+:res) present(x) do i = 1, n res = res + abs(x(i)) end do @@ -265,7 +265,7 @@ contains info = 0 if (y%is_dev()) call y%sync() - !$acc parallel loop + !$acc parallel loop present(x,y) do i = 1, size(x) y%v(i) = y%v(i) * x(i) end do @@ -283,7 +283,7 @@ contains info = 0 if (z%is_dev()) call z%sync() - !$acc parallel loop + !$acc parallel loop present(x,y,z%v) do i = 1, size(x) z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) end do @@ -327,7 +327,7 @@ contains complex(psb_dpk_), intent(inout) :: y(:) complex(psb_dpk_), intent(in) :: alpha, beta integer(psb_ipk_), intent(out) :: info - !$acc parallel + !$acc parallel present(x,y) !$acc loop do i = 1, m y(i) = alpha * x(i) + beta * y(i) @@ -384,7 +384,7 @@ contains if ((nx < m) .or. (ny < m) .or. (nz < m)) then info = psb_err_internal_error_ else - !$acc parallel loop + !$acc parallel loop present(xx%v,yy%v,zz%v) do i = 1, m yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) @@ -416,6 +416,7 @@ contains integer(psb_ipk_) :: info, k logical :: acc_done if (.not.allocated(y%combuf)) then + write(0,*) 'allocation error for y%combuf ' call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') return end if @@ -443,8 +444,8 @@ contains integer(psb_ipk_) :: n, idx(:) complex(psb_dpk_) :: beta,x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -488,8 +489,8 @@ contains integer(psb_ipk_) :: n, idx(:) complex(psb_dpk_) :: beta, x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -531,6 +532,7 @@ contains acc_done = .false. if (.not.allocated(x%combuf)) then + write(0,*) 'oacc allocation error combuf gthzbuf ' call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') return end if @@ -556,13 +558,13 @@ contains integer(psb_ipk_) :: n, idx(:) complex(psb_dpk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine z_oacc_gthzbuf @@ -600,13 +602,13 @@ contains integer(psb_ipk_) :: n, idx(:) complex(psb_dpk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine z_oacc_gthzv_x @@ -633,7 +635,7 @@ contains if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() - !$acc parallel loop + !$acc parallel loop present(x%v,virl%v,vval%v) do i = 1, n x%v(virl%v(i)) = vval%v(i) end do @@ -757,7 +759,7 @@ contains if (present(first)) first_ = max(1, first) if (present(last)) last_ = min(last, last_) - !$acc parallel loop + !$acc parallel loop present(x%v) do i = first_, last_ x%v(i) = val end do @@ -849,26 +851,36 @@ contains class(psb_z_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (n /= psb_size(x%combuf)) then + + !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf) + if (n > psb_size(x%combuf)) then + !write(0,*) 'oacc new_buffer: reallocating ' + if (allocated(x%combuf)) then + !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + !$acc exit data delete(x%combuf) + end if call x%psb_z_base_vect_type%new_buffer(n,info) !$acc enter data copyin(x%combuf) + ! call acc_copyin(x%combuf) end if end subroutine z_oacc_new_buffer subroutine z_oacc_sync_dev_space(x) implicit none class(psb_z_vect_oacc), intent(inout) :: x - if (allocated(x%v)) call acc_create(x%v) +!!$ write(0,*) 'oacc sync_dev_space' + if (psb_size(x%v)>0) call acc_copyin(x%v) end subroutine z_oacc_sync_dev_space subroutine z_oacc_sync(x) implicit none class(psb_z_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call acc_update_self(x%v) + if (psb_size(x%v)>0) call acc_update_self(x%v) end if if (x%is_host()) then - call acc_update_device(x%v) + if (.not.acc_is_present(x%v)) call z_oacc_sync_dev_space(x) + if (psb_size(x%v)>0) call acc_update_device(x%v) end if call x%set_sync() end subroutine z_oacc_sync @@ -941,6 +953,8 @@ contains type(psb_z_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info info = 0 +!!$ write(0,*) 'oacc final_vect_free' + call x%free_buffer(info) if (allocated(x%v)) then if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) @@ -953,8 +967,9 @@ contains class(psb_z_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 +!!$ write(0,*) 'oacc vect_free' + call x%free_buffer(info) if (acc_is_present(x%v)) call acc_delete_finalize(x%v) - if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_z_base_vect_type%free(info) end subroutine z_oacc_vect_free @@ -964,8 +979,10 @@ contains integer(psb_ipk_), intent(out) :: info info = 0 - if (psb_oacc_get_maybe_free_buffer())& - & call x%free_buffer(info) + if (psb_oacc_get_maybe_free_buffer()) then + !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer() + call x%free_buffer(info) + end if end subroutine z_oacc_vect_maybe_free_buffer @@ -973,7 +990,7 @@ contains implicit none class(psb_z_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info - +! write(0,*) 'oacc free_buffer' info = 0 if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_z_base_vect_type%free_buffer(info) @@ -985,7 +1002,6 @@ contains class(psb_z_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: res - if (x%is_dev()) call x%sync() res = size(x%v) end function z_oacc_get_size From 49469ce021df8e9c15f23d14a6d1929801b6d34d Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 8 Oct 2024 17:36:44 +0200 Subject: [PATCH 3/4] Various changes into openacc --- openacc/psb_c_oacc_csr_mat_mod.F90 | 6 +-- openacc/psb_c_oacc_ell_mat_mod.F90 | 8 +-- openacc/psb_c_oacc_hll_mat_mod.F90 | 10 ++-- openacc/psb_c_oacc_vect_mod.F90 | 80 ++++++++++++++++++------------ openacc/psb_d_oacc_csr_mat_mod.F90 | 6 +-- openacc/psb_d_oacc_ell_mat_mod.F90 | 8 +-- openacc/psb_d_oacc_hll_mat_mod.F90 | 10 ++-- openacc/psb_d_oacc_vect_mod.F90 | 80 ++++++++++++++++++------------ openacc/psb_i_oacc_vect_mod.F90 | 58 ++++++++++++++-------- openacc/psb_l_oacc_vect_mod.F90 | 58 ++++++++++++++-------- openacc/psb_s_oacc_csr_mat_mod.F90 | 6 +-- openacc/psb_s_oacc_ell_mat_mod.F90 | 8 +-- openacc/psb_s_oacc_hll_mat_mod.F90 | 10 ++-- openacc/psb_s_oacc_vect_mod.F90 | 80 ++++++++++++++++++------------ openacc/psb_z_oacc_csr_mat_mod.F90 | 6 +-- openacc/psb_z_oacc_ell_mat_mod.F90 | 8 +-- openacc/psb_z_oacc_hll_mat_mod.F90 | 10 ++-- openacc/psb_z_oacc_vect_mod.F90 | 80 ++++++++++++++++++------------ 18 files changed, 314 insertions(+), 218 deletions(-) diff --git a/openacc/psb_c_oacc_csr_mat_mod.F90 b/openacc/psb_c_oacc_csr_mat_mod.F90 index 94edc5e9..c6461fe3 100644 --- a/openacc/psb_c_oacc_csr_mat_mod.F90 +++ b/openacc/psb_c_oacc_csr_mat_mod.F90 @@ -257,9 +257,9 @@ contains ! but with size 0, then CREATE,UPDATE and DELETE ! will fail ! - if (psb_size(a%val)>0) call acc_create(a%val) - if (psb_size(a%ja)>0) call acc_create(a%ja) - if (psb_size(a%irp)>0) call acc_create(a%irp) + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irp)>0) call acc_copyin(a%irp) end subroutine c_oacc_csr_sync_dev_space subroutine c_oacc_csr_sync(a) diff --git a/openacc/psb_c_oacc_ell_mat_mod.F90 b/openacc/psb_c_oacc_ell_mat_mod.F90 index f0b9779b..b2168646 100644 --- a/openacc/psb_c_oacc_ell_mat_mod.F90 +++ b/openacc/psb_c_oacc_ell_mat_mod.F90 @@ -186,10 +186,10 @@ contains ! but with size 0, then CREATE,UPDATE and DELETE ! will fail ! - if (psb_size(a%val)>0) call acc_create(a%val) - if (psb_size(a%ja)>0) call acc_create(a%ja) - if (psb_size(a%irn)>0) call acc_create(a%irn) - if (psb_size(a%idiag)>0) call acc_create(a%idiag) + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irn)>0) call acc_copyin(a%irn) + if (psb_size(a%idiag)>0) call acc_copyin(a%idiag) end subroutine c_oacc_ell_sync_dev_space function c_oacc_ell_is_host(a) result(res) diff --git a/openacc/psb_c_oacc_hll_mat_mod.F90 b/openacc/psb_c_oacc_hll_mat_mod.F90 index 98c6a2ee..f8c19275 100644 --- a/openacc/psb_c_oacc_hll_mat_mod.F90 +++ b/openacc/psb_c_oacc_hll_mat_mod.F90 @@ -240,11 +240,11 @@ contains ! but with size 0, then CREATE,UPDATE and DELETE ! will fail ! - if (psb_size(a%val)>0) call acc_create(a%val) - if (psb_size(a%ja)>0) call acc_create(a%ja) - if (psb_size(a%irn)>0) call acc_create(a%irn) - if (psb_size(a%idiag)>0) call acc_create(a%idiag) - if (psb_size(a%hkoffs)>0) call acc_create(a%hkoffs) + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irn)>0) call acc_copyin(a%irn) + if (psb_size(a%idiag)>0) call acc_copyin(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_copyin(a%hkoffs) end subroutine c_oacc_hll_sync_dev_space diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90 index e479f58d..40437184 100644 --- a/openacc/psb_c_oacc_vect_mod.F90 +++ b/openacc/psb_c_oacc_vect_mod.F90 @@ -113,7 +113,7 @@ contains complex(psb_spk_), intent(inout) :: x(:) integer(psb_ipk_) :: n integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x) do i = 1, n x(i) = abs(x(i)) end do @@ -144,7 +144,7 @@ contains complex(psb_spk_), intent(inout) :: x(:),y(:) integer(psb_ipk_) :: n integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x,y) do i = 1, n y(i) = abs(x(i)) end do @@ -164,7 +164,7 @@ contains complex(psb_spk_), intent(in) :: alpha complex(psb_spk_), intent(inout) :: x(:) integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x) do i = 1, size(x) x(i) = alpha * x(i) end do @@ -189,7 +189,7 @@ contains real(psb_spk_) :: sum, mx integer(psb_ipk_) :: i mx = szero - !$acc parallel loop reduction(max:mx) + !$acc parallel loop reduction(max:mx) present(x) do i = 1, n if (abs(x(i)) > mx) mx = abs(x(i)) end do @@ -197,7 +197,7 @@ contains res = mx else sum = szero - !$acc parallel loop reduction(+:sum) + !$acc parallel loop reduction(+:sum) present(x) do i = 1, n sum = sum + abs(x(i)/mx)**2 end do @@ -223,7 +223,7 @@ contains real(psb_spk_) :: max_val integer(psb_ipk_) :: i max_val = szero - !$acc parallel loop reduction(max:max_val) + !$acc parallel loop reduction(max:max_val) present(x) do i = 1, n if (abs(x(i)) > max_val) max_val = abs(x(i)) end do @@ -248,7 +248,7 @@ contains real(psb_spk_) :: res integer(psb_ipk_) :: i res = szero - !$acc parallel loop reduction(+:res) + !$acc parallel loop reduction(+:res) present(x) do i = 1, n res = res + abs(x(i)) end do @@ -265,7 +265,7 @@ contains info = 0 if (y%is_dev()) call y%sync() - !$acc parallel loop + !$acc parallel loop present(x,y) do i = 1, size(x) y%v(i) = y%v(i) * x(i) end do @@ -283,7 +283,7 @@ contains info = 0 if (z%is_dev()) call z%sync() - !$acc parallel loop + !$acc parallel loop present(x,y,z%v) do i = 1, size(x) z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) end do @@ -327,7 +327,7 @@ contains complex(psb_spk_), intent(inout) :: y(:) complex(psb_spk_), intent(in) :: alpha, beta integer(psb_ipk_), intent(out) :: info - !$acc parallel + !$acc parallel present(x,y) !$acc loop do i = 1, m y(i) = alpha * x(i) + beta * y(i) @@ -384,7 +384,7 @@ contains if ((nx < m) .or. (ny < m) .or. (nz < m)) then info = psb_err_internal_error_ else - !$acc parallel loop + !$acc parallel loop present(xx%v,yy%v,zz%v) do i = 1, m yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) @@ -416,6 +416,7 @@ contains integer(psb_ipk_) :: info, k logical :: acc_done if (.not.allocated(y%combuf)) then + write(0,*) 'allocation error for y%combuf ' call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') return end if @@ -443,8 +444,8 @@ contains integer(psb_ipk_) :: n, idx(:) complex(psb_spk_) :: beta,x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -488,8 +489,8 @@ contains integer(psb_ipk_) :: n, idx(:) complex(psb_spk_) :: beta, x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -531,6 +532,7 @@ contains acc_done = .false. if (.not.allocated(x%combuf)) then + write(0,*) 'oacc allocation error combuf gthzbuf ' call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') return end if @@ -556,13 +558,13 @@ contains integer(psb_ipk_) :: n, idx(:) complex(psb_spk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine c_oacc_gthzbuf @@ -600,13 +602,13 @@ contains integer(psb_ipk_) :: n, idx(:) complex(psb_spk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine c_oacc_gthzv_x @@ -633,7 +635,7 @@ contains if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() - !$acc parallel loop + !$acc parallel loop present(x%v,virl%v,vval%v) do i = 1, n x%v(virl%v(i)) = vval%v(i) end do @@ -757,7 +759,7 @@ contains if (present(first)) first_ = max(1, first) if (present(last)) last_ = min(last, last_) - !$acc parallel loop + !$acc parallel loop present(x%v) do i = first_, last_ x%v(i) = val end do @@ -849,26 +851,36 @@ contains class(psb_c_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (n /= psb_size(x%combuf)) then + + !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf) + if (n > psb_size(x%combuf)) then + !write(0,*) 'oacc new_buffer: reallocating ' + if (allocated(x%combuf)) then + !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + !$acc exit data delete(x%combuf) + end if call x%psb_c_base_vect_type%new_buffer(n,info) !$acc enter data copyin(x%combuf) + ! call acc_copyin(x%combuf) end if end subroutine c_oacc_new_buffer subroutine c_oacc_sync_dev_space(x) implicit none class(psb_c_vect_oacc), intent(inout) :: x - if (allocated(x%v)) call acc_create(x%v) +!!$ write(0,*) 'oacc sync_dev_space' + if (psb_size(x%v)>0) call acc_copyin(x%v) end subroutine c_oacc_sync_dev_space subroutine c_oacc_sync(x) implicit none class(psb_c_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call acc_update_self(x%v) + if (psb_size(x%v)>0) call acc_update_self(x%v) end if if (x%is_host()) then - call acc_update_device(x%v) + if (.not.acc_is_present(x%v)) call c_oacc_sync_dev_space(x) + if (psb_size(x%v)>0) call acc_update_device(x%v) end if call x%set_sync() end subroutine c_oacc_sync @@ -941,6 +953,8 @@ contains type(psb_c_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info info = 0 +!!$ write(0,*) 'oacc final_vect_free' + call x%free_buffer(info) if (allocated(x%v)) then if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) @@ -953,8 +967,9 @@ contains class(psb_c_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 +!!$ write(0,*) 'oacc vect_free' + call x%free_buffer(info) if (acc_is_present(x%v)) call acc_delete_finalize(x%v) - if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_c_base_vect_type%free(info) end subroutine c_oacc_vect_free @@ -964,8 +979,10 @@ contains integer(psb_ipk_), intent(out) :: info info = 0 - if (psb_oacc_get_maybe_free_buffer())& - & call x%free_buffer(info) + if (psb_oacc_get_maybe_free_buffer()) then + !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer() + call x%free_buffer(info) + end if end subroutine c_oacc_vect_maybe_free_buffer @@ -973,7 +990,7 @@ contains implicit none class(psb_c_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info - +! write(0,*) 'oacc free_buffer' info = 0 if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_c_base_vect_type%free_buffer(info) @@ -985,7 +1002,6 @@ contains class(psb_c_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: res - if (x%is_dev()) call x%sync() res = size(x%v) end function c_oacc_get_size diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90 index a3119b64..21907312 100644 --- a/openacc/psb_d_oacc_csr_mat_mod.F90 +++ b/openacc/psb_d_oacc_csr_mat_mod.F90 @@ -257,9 +257,9 @@ contains ! but with size 0, then CREATE,UPDATE and DELETE ! will fail ! - if (psb_size(a%val)>0) call acc_create(a%val) - if (psb_size(a%ja)>0) call acc_create(a%ja) - if (psb_size(a%irp)>0) call acc_create(a%irp) + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irp)>0) call acc_copyin(a%irp) end subroutine d_oacc_csr_sync_dev_space subroutine d_oacc_csr_sync(a) diff --git a/openacc/psb_d_oacc_ell_mat_mod.F90 b/openacc/psb_d_oacc_ell_mat_mod.F90 index 3932e286..021face3 100644 --- a/openacc/psb_d_oacc_ell_mat_mod.F90 +++ b/openacc/psb_d_oacc_ell_mat_mod.F90 @@ -186,10 +186,10 @@ contains ! but with size 0, then CREATE,UPDATE and DELETE ! will fail ! - if (psb_size(a%val)>0) call acc_create(a%val) - if (psb_size(a%ja)>0) call acc_create(a%ja) - if (psb_size(a%irn)>0) call acc_create(a%irn) - if (psb_size(a%idiag)>0) call acc_create(a%idiag) + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irn)>0) call acc_copyin(a%irn) + if (psb_size(a%idiag)>0) call acc_copyin(a%idiag) end subroutine d_oacc_ell_sync_dev_space function d_oacc_ell_is_host(a) result(res) diff --git a/openacc/psb_d_oacc_hll_mat_mod.F90 b/openacc/psb_d_oacc_hll_mat_mod.F90 index fd0fe54f..264bbcce 100644 --- a/openacc/psb_d_oacc_hll_mat_mod.F90 +++ b/openacc/psb_d_oacc_hll_mat_mod.F90 @@ -240,11 +240,11 @@ contains ! but with size 0, then CREATE,UPDATE and DELETE ! will fail ! - if (psb_size(a%val)>0) call acc_create(a%val) - if (psb_size(a%ja)>0) call acc_create(a%ja) - if (psb_size(a%irn)>0) call acc_create(a%irn) - if (psb_size(a%idiag)>0) call acc_create(a%idiag) - if (psb_size(a%hkoffs)>0) call acc_create(a%hkoffs) + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irn)>0) call acc_copyin(a%irn) + if (psb_size(a%idiag)>0) call acc_copyin(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_copyin(a%hkoffs) end subroutine d_oacc_hll_sync_dev_space diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90 index 7fd2a441..84441c8a 100644 --- a/openacc/psb_d_oacc_vect_mod.F90 +++ b/openacc/psb_d_oacc_vect_mod.F90 @@ -113,7 +113,7 @@ contains real(psb_dpk_), intent(inout) :: x(:) integer(psb_ipk_) :: n integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x) do i = 1, n x(i) = abs(x(i)) end do @@ -144,7 +144,7 @@ contains real(psb_dpk_), intent(inout) :: x(:),y(:) integer(psb_ipk_) :: n integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x,y) do i = 1, n y(i) = abs(x(i)) end do @@ -164,7 +164,7 @@ contains real(psb_dpk_), intent(in) :: alpha real(psb_dpk_), intent(inout) :: x(:) integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x) do i = 1, size(x) x(i) = alpha * x(i) end do @@ -189,7 +189,7 @@ contains real(psb_dpk_) :: sum, mx integer(psb_ipk_) :: i mx = dzero - !$acc parallel loop reduction(max:mx) + !$acc parallel loop reduction(max:mx) present(x) do i = 1, n if (abs(x(i)) > mx) mx = abs(x(i)) end do @@ -197,7 +197,7 @@ contains res = mx else sum = dzero - !$acc parallel loop reduction(+:sum) + !$acc parallel loop reduction(+:sum) present(x) do i = 1, n sum = sum + abs(x(i)/mx)**2 end do @@ -223,7 +223,7 @@ contains real(psb_dpk_) :: max_val integer(psb_ipk_) :: i max_val = dzero - !$acc parallel loop reduction(max:max_val) + !$acc parallel loop reduction(max:max_val) present(x) do i = 1, n if (abs(x(i)) > max_val) max_val = abs(x(i)) end do @@ -248,7 +248,7 @@ contains real(psb_dpk_) :: res integer(psb_ipk_) :: i res = dzero - !$acc parallel loop reduction(+:res) + !$acc parallel loop reduction(+:res) present(x) do i = 1, n res = res + abs(x(i)) end do @@ -265,7 +265,7 @@ contains info = 0 if (y%is_dev()) call y%sync() - !$acc parallel loop + !$acc parallel loop present(x,y) do i = 1, size(x) y%v(i) = y%v(i) * x(i) end do @@ -283,7 +283,7 @@ contains info = 0 if (z%is_dev()) call z%sync() - !$acc parallel loop + !$acc parallel loop present(x,y,z%v) do i = 1, size(x) z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) end do @@ -327,7 +327,7 @@ contains real(psb_dpk_), intent(inout) :: y(:) real(psb_dpk_), intent(in) :: alpha, beta integer(psb_ipk_), intent(out) :: info - !$acc parallel + !$acc parallel present(x,y) !$acc loop do i = 1, m y(i) = alpha * x(i) + beta * y(i) @@ -384,7 +384,7 @@ contains if ((nx < m) .or. (ny < m) .or. (nz < m)) then info = psb_err_internal_error_ else - !$acc parallel loop + !$acc parallel loop present(xx%v,yy%v,zz%v) do i = 1, m yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) @@ -416,6 +416,7 @@ contains integer(psb_ipk_) :: info, k logical :: acc_done if (.not.allocated(y%combuf)) then + write(0,*) 'allocation error for y%combuf ' call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') return end if @@ -443,8 +444,8 @@ contains integer(psb_ipk_) :: n, idx(:) real(psb_dpk_) :: beta,x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -488,8 +489,8 @@ contains integer(psb_ipk_) :: n, idx(:) real(psb_dpk_) :: beta, x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -531,6 +532,7 @@ contains acc_done = .false. if (.not.allocated(x%combuf)) then + write(0,*) 'oacc allocation error combuf gthzbuf ' call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') return end if @@ -556,13 +558,13 @@ contains integer(psb_ipk_) :: n, idx(:) real(psb_dpk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine d_oacc_gthzbuf @@ -600,13 +602,13 @@ contains integer(psb_ipk_) :: n, idx(:) real(psb_dpk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine d_oacc_gthzv_x @@ -633,7 +635,7 @@ contains if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() - !$acc parallel loop + !$acc parallel loop present(x%v,virl%v,vval%v) do i = 1, n x%v(virl%v(i)) = vval%v(i) end do @@ -757,7 +759,7 @@ contains if (present(first)) first_ = max(1, first) if (present(last)) last_ = min(last, last_) - !$acc parallel loop + !$acc parallel loop present(x%v) do i = first_, last_ x%v(i) = val end do @@ -849,26 +851,36 @@ contains class(psb_d_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (n /= psb_size(x%combuf)) then + + !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf) + if (n > psb_size(x%combuf)) then + !write(0,*) 'oacc new_buffer: reallocating ' + if (allocated(x%combuf)) then + !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + !$acc exit data delete(x%combuf) + end if call x%psb_d_base_vect_type%new_buffer(n,info) !$acc enter data copyin(x%combuf) + ! call acc_copyin(x%combuf) end if end subroutine d_oacc_new_buffer subroutine d_oacc_sync_dev_space(x) implicit none class(psb_d_vect_oacc), intent(inout) :: x - if (allocated(x%v)) call acc_create(x%v) +!!$ write(0,*) 'oacc sync_dev_space' + if (psb_size(x%v)>0) call acc_copyin(x%v) end subroutine d_oacc_sync_dev_space subroutine d_oacc_sync(x) implicit none class(psb_d_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call acc_update_self(x%v) + if (psb_size(x%v)>0) call acc_update_self(x%v) end if if (x%is_host()) then - call acc_update_device(x%v) + if (.not.acc_is_present(x%v)) call d_oacc_sync_dev_space(x) + if (psb_size(x%v)>0) call acc_update_device(x%v) end if call x%set_sync() end subroutine d_oacc_sync @@ -941,6 +953,8 @@ contains type(psb_d_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info info = 0 +!!$ write(0,*) 'oacc final_vect_free' + call x%free_buffer(info) if (allocated(x%v)) then if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) @@ -953,8 +967,9 @@ contains class(psb_d_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 +!!$ write(0,*) 'oacc vect_free' + call x%free_buffer(info) if (acc_is_present(x%v)) call acc_delete_finalize(x%v) - if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_d_base_vect_type%free(info) end subroutine d_oacc_vect_free @@ -964,8 +979,10 @@ contains integer(psb_ipk_), intent(out) :: info info = 0 - if (psb_oacc_get_maybe_free_buffer())& - & call x%free_buffer(info) + if (psb_oacc_get_maybe_free_buffer()) then + !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer() + call x%free_buffer(info) + end if end subroutine d_oacc_vect_maybe_free_buffer @@ -973,7 +990,7 @@ contains implicit none class(psb_d_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info - +! write(0,*) 'oacc free_buffer' info = 0 if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_d_base_vect_type%free_buffer(info) @@ -985,7 +1002,6 @@ contains class(psb_d_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: res - if (x%is_dev()) call x%sync() res = size(x%v) end function d_oacc_get_size diff --git a/openacc/psb_i_oacc_vect_mod.F90 b/openacc/psb_i_oacc_vect_mod.F90 index 455453a1..42cdc18c 100644 --- a/openacc/psb_i_oacc_vect_mod.F90 +++ b/openacc/psb_i_oacc_vect_mod.F90 @@ -70,6 +70,7 @@ contains integer(psb_ipk_) :: info, k logical :: acc_done if (.not.allocated(y%combuf)) then + write(0,*) 'allocation error for y%combuf ' call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') return end if @@ -97,8 +98,8 @@ contains integer(psb_ipk_) :: n, idx(:) integer(psb_ipk_) :: beta,x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -142,8 +143,8 @@ contains integer(psb_ipk_) :: n, idx(:) integer(psb_ipk_) :: beta, x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -185,6 +186,7 @@ contains acc_done = .false. if (.not.allocated(x%combuf)) then + write(0,*) 'oacc allocation error combuf gthzbuf ' call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') return end if @@ -210,13 +212,13 @@ contains integer(psb_ipk_) :: n, idx(:) integer(psb_ipk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine i_oacc_gthzbuf @@ -254,13 +256,13 @@ contains integer(psb_ipk_) :: n, idx(:) integer(psb_ipk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine i_oacc_gthzv_x @@ -287,7 +289,7 @@ contains if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() - !$acc parallel loop + !$acc parallel loop present(x%v,virl%v,vval%v) do i = 1, n x%v(virl%v(i)) = vval%v(i) end do @@ -411,7 +413,7 @@ contains if (present(first)) first_ = max(1, first) if (present(last)) last_ = min(last, last_) - !$acc parallel loop + !$acc parallel loop present(x%v) do i = first_, last_ x%v(i) = val end do @@ -449,26 +451,36 @@ contains class(psb_i_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (n /= psb_size(x%combuf)) then + + !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf) + if (n > psb_size(x%combuf)) then + !write(0,*) 'oacc new_buffer: reallocating ' + if (allocated(x%combuf)) then + !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + !$acc exit data delete(x%combuf) + end if call x%psb_i_base_vect_type%new_buffer(n,info) !$acc enter data copyin(x%combuf) + ! call acc_copyin(x%combuf) end if end subroutine i_oacc_new_buffer subroutine i_oacc_sync_dev_space(x) implicit none class(psb_i_vect_oacc), intent(inout) :: x - if (allocated(x%v)) call acc_create(x%v) +!!$ write(0,*) 'oacc sync_dev_space' + if (psb_size(x%v)>0) call acc_copyin(x%v) end subroutine i_oacc_sync_dev_space subroutine i_oacc_sync(x) implicit none class(psb_i_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call acc_update_self(x%v) + if (psb_size(x%v)>0) call acc_update_self(x%v) end if if (x%is_host()) then - call acc_update_device(x%v) + if (.not.acc_is_present(x%v)) call i_oacc_sync_dev_space(x) + if (psb_size(x%v)>0) call acc_update_device(x%v) end if call x%set_sync() end subroutine i_oacc_sync @@ -541,6 +553,8 @@ contains type(psb_i_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info info = 0 +!!$ write(0,*) 'oacc final_vect_free' + call x%free_buffer(info) if (allocated(x%v)) then if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) @@ -553,8 +567,9 @@ contains class(psb_i_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 +!!$ write(0,*) 'oacc vect_free' + call x%free_buffer(info) if (acc_is_present(x%v)) call acc_delete_finalize(x%v) - if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_i_base_vect_type%free(info) end subroutine i_oacc_vect_free @@ -564,8 +579,10 @@ contains integer(psb_ipk_), intent(out) :: info info = 0 - if (psb_oacc_get_maybe_free_buffer())& - & call x%free_buffer(info) + if (psb_oacc_get_maybe_free_buffer()) then + !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer() + call x%free_buffer(info) + end if end subroutine i_oacc_vect_maybe_free_buffer @@ -573,7 +590,7 @@ contains implicit none class(psb_i_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info - +! write(0,*) 'oacc free_buffer' info = 0 if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_i_base_vect_type%free_buffer(info) @@ -585,7 +602,6 @@ contains class(psb_i_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: res - if (x%is_dev()) call x%sync() res = size(x%v) end function i_oacc_get_size diff --git a/openacc/psb_l_oacc_vect_mod.F90 b/openacc/psb_l_oacc_vect_mod.F90 index d35e9141..60cdee35 100644 --- a/openacc/psb_l_oacc_vect_mod.F90 +++ b/openacc/psb_l_oacc_vect_mod.F90 @@ -72,6 +72,7 @@ contains integer(psb_ipk_) :: info, k logical :: acc_done if (.not.allocated(y%combuf)) then + write(0,*) 'allocation error for y%combuf ' call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') return end if @@ -99,8 +100,8 @@ contains integer(psb_ipk_) :: n, idx(:) integer(psb_lpk_) :: beta,x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -144,8 +145,8 @@ contains integer(psb_ipk_) :: n, idx(:) integer(psb_lpk_) :: beta, x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -187,6 +188,7 @@ contains acc_done = .false. if (.not.allocated(x%combuf)) then + write(0,*) 'oacc allocation error combuf gthzbuf ' call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') return end if @@ -212,13 +214,13 @@ contains integer(psb_ipk_) :: n, idx(:) integer(psb_lpk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine l_oacc_gthzbuf @@ -256,13 +258,13 @@ contains integer(psb_ipk_) :: n, idx(:) integer(psb_lpk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine l_oacc_gthzv_x @@ -289,7 +291,7 @@ contains if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() - !$acc parallel loop + !$acc parallel loop present(x%v,virl%v,vval%v) do i = 1, n x%v(virl%v(i)) = vval%v(i) end do @@ -413,7 +415,7 @@ contains if (present(first)) first_ = max(1, first) if (present(last)) last_ = min(last, last_) - !$acc parallel loop + !$acc parallel loop present(x%v) do i = first_, last_ x%v(i) = val end do @@ -451,26 +453,36 @@ contains class(psb_l_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (n /= psb_size(x%combuf)) then + + !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf) + if (n > psb_size(x%combuf)) then + !write(0,*) 'oacc new_buffer: reallocating ' + if (allocated(x%combuf)) then + !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + !$acc exit data delete(x%combuf) + end if call x%psb_l_base_vect_type%new_buffer(n,info) !$acc enter data copyin(x%combuf) + ! call acc_copyin(x%combuf) end if end subroutine l_oacc_new_buffer subroutine l_oacc_sync_dev_space(x) implicit none class(psb_l_vect_oacc), intent(inout) :: x - if (allocated(x%v)) call acc_create(x%v) +!!$ write(0,*) 'oacc sync_dev_space' + if (psb_size(x%v)>0) call acc_copyin(x%v) end subroutine l_oacc_sync_dev_space subroutine l_oacc_sync(x) implicit none class(psb_l_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call acc_update_self(x%v) + if (psb_size(x%v)>0) call acc_update_self(x%v) end if if (x%is_host()) then - call acc_update_device(x%v) + if (.not.acc_is_present(x%v)) call l_oacc_sync_dev_space(x) + if (psb_size(x%v)>0) call acc_update_device(x%v) end if call x%set_sync() end subroutine l_oacc_sync @@ -543,6 +555,8 @@ contains type(psb_l_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info info = 0 +!!$ write(0,*) 'oacc final_vect_free' + call x%free_buffer(info) if (allocated(x%v)) then if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) @@ -555,8 +569,9 @@ contains class(psb_l_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 +!!$ write(0,*) 'oacc vect_free' + call x%free_buffer(info) if (acc_is_present(x%v)) call acc_delete_finalize(x%v) - if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_l_base_vect_type%free(info) end subroutine l_oacc_vect_free @@ -566,8 +581,10 @@ contains integer(psb_ipk_), intent(out) :: info info = 0 - if (psb_oacc_get_maybe_free_buffer())& - & call x%free_buffer(info) + if (psb_oacc_get_maybe_free_buffer()) then + !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer() + call x%free_buffer(info) + end if end subroutine l_oacc_vect_maybe_free_buffer @@ -575,7 +592,7 @@ contains implicit none class(psb_l_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info - +! write(0,*) 'oacc free_buffer' info = 0 if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_l_base_vect_type%free_buffer(info) @@ -587,7 +604,6 @@ contains class(psb_l_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: res - if (x%is_dev()) call x%sync() res = size(x%v) end function l_oacc_get_size diff --git a/openacc/psb_s_oacc_csr_mat_mod.F90 b/openacc/psb_s_oacc_csr_mat_mod.F90 index 5eaf80f7..d66dca3b 100644 --- a/openacc/psb_s_oacc_csr_mat_mod.F90 +++ b/openacc/psb_s_oacc_csr_mat_mod.F90 @@ -257,9 +257,9 @@ contains ! but with size 0, then CREATE,UPDATE and DELETE ! will fail ! - if (psb_size(a%val)>0) call acc_create(a%val) - if (psb_size(a%ja)>0) call acc_create(a%ja) - if (psb_size(a%irp)>0) call acc_create(a%irp) + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irp)>0) call acc_copyin(a%irp) end subroutine s_oacc_csr_sync_dev_space subroutine s_oacc_csr_sync(a) diff --git a/openacc/psb_s_oacc_ell_mat_mod.F90 b/openacc/psb_s_oacc_ell_mat_mod.F90 index 56775879..600a08a7 100644 --- a/openacc/psb_s_oacc_ell_mat_mod.F90 +++ b/openacc/psb_s_oacc_ell_mat_mod.F90 @@ -186,10 +186,10 @@ contains ! but with size 0, then CREATE,UPDATE and DELETE ! will fail ! - if (psb_size(a%val)>0) call acc_create(a%val) - if (psb_size(a%ja)>0) call acc_create(a%ja) - if (psb_size(a%irn)>0) call acc_create(a%irn) - if (psb_size(a%idiag)>0) call acc_create(a%idiag) + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irn)>0) call acc_copyin(a%irn) + if (psb_size(a%idiag)>0) call acc_copyin(a%idiag) end subroutine s_oacc_ell_sync_dev_space function s_oacc_ell_is_host(a) result(res) diff --git a/openacc/psb_s_oacc_hll_mat_mod.F90 b/openacc/psb_s_oacc_hll_mat_mod.F90 index 997433a1..33033248 100644 --- a/openacc/psb_s_oacc_hll_mat_mod.F90 +++ b/openacc/psb_s_oacc_hll_mat_mod.F90 @@ -240,11 +240,11 @@ contains ! but with size 0, then CREATE,UPDATE and DELETE ! will fail ! - if (psb_size(a%val)>0) call acc_create(a%val) - if (psb_size(a%ja)>0) call acc_create(a%ja) - if (psb_size(a%irn)>0) call acc_create(a%irn) - if (psb_size(a%idiag)>0) call acc_create(a%idiag) - if (psb_size(a%hkoffs)>0) call acc_create(a%hkoffs) + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irn)>0) call acc_copyin(a%irn) + if (psb_size(a%idiag)>0) call acc_copyin(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_copyin(a%hkoffs) end subroutine s_oacc_hll_sync_dev_space diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90 index 87eeccea..70c9dd49 100644 --- a/openacc/psb_s_oacc_vect_mod.F90 +++ b/openacc/psb_s_oacc_vect_mod.F90 @@ -113,7 +113,7 @@ contains real(psb_spk_), intent(inout) :: x(:) integer(psb_ipk_) :: n integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x) do i = 1, n x(i) = abs(x(i)) end do @@ -144,7 +144,7 @@ contains real(psb_spk_), intent(inout) :: x(:),y(:) integer(psb_ipk_) :: n integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x,y) do i = 1, n y(i) = abs(x(i)) end do @@ -164,7 +164,7 @@ contains real(psb_spk_), intent(in) :: alpha real(psb_spk_), intent(inout) :: x(:) integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x) do i = 1, size(x) x(i) = alpha * x(i) end do @@ -189,7 +189,7 @@ contains real(psb_spk_) :: sum, mx integer(psb_ipk_) :: i mx = szero - !$acc parallel loop reduction(max:mx) + !$acc parallel loop reduction(max:mx) present(x) do i = 1, n if (abs(x(i)) > mx) mx = abs(x(i)) end do @@ -197,7 +197,7 @@ contains res = mx else sum = szero - !$acc parallel loop reduction(+:sum) + !$acc parallel loop reduction(+:sum) present(x) do i = 1, n sum = sum + abs(x(i)/mx)**2 end do @@ -223,7 +223,7 @@ contains real(psb_spk_) :: max_val integer(psb_ipk_) :: i max_val = szero - !$acc parallel loop reduction(max:max_val) + !$acc parallel loop reduction(max:max_val) present(x) do i = 1, n if (abs(x(i)) > max_val) max_val = abs(x(i)) end do @@ -248,7 +248,7 @@ contains real(psb_spk_) :: res integer(psb_ipk_) :: i res = szero - !$acc parallel loop reduction(+:res) + !$acc parallel loop reduction(+:res) present(x) do i = 1, n res = res + abs(x(i)) end do @@ -265,7 +265,7 @@ contains info = 0 if (y%is_dev()) call y%sync() - !$acc parallel loop + !$acc parallel loop present(x,y) do i = 1, size(x) y%v(i) = y%v(i) * x(i) end do @@ -283,7 +283,7 @@ contains info = 0 if (z%is_dev()) call z%sync() - !$acc parallel loop + !$acc parallel loop present(x,y,z%v) do i = 1, size(x) z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) end do @@ -327,7 +327,7 @@ contains real(psb_spk_), intent(inout) :: y(:) real(psb_spk_), intent(in) :: alpha, beta integer(psb_ipk_), intent(out) :: info - !$acc parallel + !$acc parallel present(x,y) !$acc loop do i = 1, m y(i) = alpha * x(i) + beta * y(i) @@ -384,7 +384,7 @@ contains if ((nx < m) .or. (ny < m) .or. (nz < m)) then info = psb_err_internal_error_ else - !$acc parallel loop + !$acc parallel loop present(xx%v,yy%v,zz%v) do i = 1, m yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) @@ -416,6 +416,7 @@ contains integer(psb_ipk_) :: info, k logical :: acc_done if (.not.allocated(y%combuf)) then + write(0,*) 'allocation error for y%combuf ' call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') return end if @@ -443,8 +444,8 @@ contains integer(psb_ipk_) :: n, idx(:) real(psb_spk_) :: beta,x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -488,8 +489,8 @@ contains integer(psb_ipk_) :: n, idx(:) real(psb_spk_) :: beta, x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -531,6 +532,7 @@ contains acc_done = .false. if (.not.allocated(x%combuf)) then + write(0,*) 'oacc allocation error combuf gthzbuf ' call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') return end if @@ -556,13 +558,13 @@ contains integer(psb_ipk_) :: n, idx(:) real(psb_spk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine s_oacc_gthzbuf @@ -600,13 +602,13 @@ contains integer(psb_ipk_) :: n, idx(:) real(psb_spk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine s_oacc_gthzv_x @@ -633,7 +635,7 @@ contains if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() - !$acc parallel loop + !$acc parallel loop present(x%v,virl%v,vval%v) do i = 1, n x%v(virl%v(i)) = vval%v(i) end do @@ -757,7 +759,7 @@ contains if (present(first)) first_ = max(1, first) if (present(last)) last_ = min(last, last_) - !$acc parallel loop + !$acc parallel loop present(x%v) do i = first_, last_ x%v(i) = val end do @@ -849,26 +851,36 @@ contains class(psb_s_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (n /= psb_size(x%combuf)) then + + !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf) + if (n > psb_size(x%combuf)) then + !write(0,*) 'oacc new_buffer: reallocating ' + if (allocated(x%combuf)) then + !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + !$acc exit data delete(x%combuf) + end if call x%psb_s_base_vect_type%new_buffer(n,info) !$acc enter data copyin(x%combuf) + ! call acc_copyin(x%combuf) end if end subroutine s_oacc_new_buffer subroutine s_oacc_sync_dev_space(x) implicit none class(psb_s_vect_oacc), intent(inout) :: x - if (allocated(x%v)) call acc_create(x%v) +!!$ write(0,*) 'oacc sync_dev_space' + if (psb_size(x%v)>0) call acc_copyin(x%v) end subroutine s_oacc_sync_dev_space subroutine s_oacc_sync(x) implicit none class(psb_s_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call acc_update_self(x%v) + if (psb_size(x%v)>0) call acc_update_self(x%v) end if if (x%is_host()) then - call acc_update_device(x%v) + if (.not.acc_is_present(x%v)) call s_oacc_sync_dev_space(x) + if (psb_size(x%v)>0) call acc_update_device(x%v) end if call x%set_sync() end subroutine s_oacc_sync @@ -941,6 +953,8 @@ contains type(psb_s_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info info = 0 +!!$ write(0,*) 'oacc final_vect_free' + call x%free_buffer(info) if (allocated(x%v)) then if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) @@ -953,8 +967,9 @@ contains class(psb_s_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 +!!$ write(0,*) 'oacc vect_free' + call x%free_buffer(info) if (acc_is_present(x%v)) call acc_delete_finalize(x%v) - if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_s_base_vect_type%free(info) end subroutine s_oacc_vect_free @@ -964,8 +979,10 @@ contains integer(psb_ipk_), intent(out) :: info info = 0 - if (psb_oacc_get_maybe_free_buffer())& - & call x%free_buffer(info) + if (psb_oacc_get_maybe_free_buffer()) then + !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer() + call x%free_buffer(info) + end if end subroutine s_oacc_vect_maybe_free_buffer @@ -973,7 +990,7 @@ contains implicit none class(psb_s_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info - +! write(0,*) 'oacc free_buffer' info = 0 if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_s_base_vect_type%free_buffer(info) @@ -985,7 +1002,6 @@ contains class(psb_s_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: res - if (x%is_dev()) call x%sync() res = size(x%v) end function s_oacc_get_size diff --git a/openacc/psb_z_oacc_csr_mat_mod.F90 b/openacc/psb_z_oacc_csr_mat_mod.F90 index ed92373d..3b66787a 100644 --- a/openacc/psb_z_oacc_csr_mat_mod.F90 +++ b/openacc/psb_z_oacc_csr_mat_mod.F90 @@ -257,9 +257,9 @@ contains ! but with size 0, then CREATE,UPDATE and DELETE ! will fail ! - if (psb_size(a%val)>0) call acc_create(a%val) - if (psb_size(a%ja)>0) call acc_create(a%ja) - if (psb_size(a%irp)>0) call acc_create(a%irp) + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irp)>0) call acc_copyin(a%irp) end subroutine z_oacc_csr_sync_dev_space subroutine z_oacc_csr_sync(a) diff --git a/openacc/psb_z_oacc_ell_mat_mod.F90 b/openacc/psb_z_oacc_ell_mat_mod.F90 index d494922f..abfb11e3 100644 --- a/openacc/psb_z_oacc_ell_mat_mod.F90 +++ b/openacc/psb_z_oacc_ell_mat_mod.F90 @@ -186,10 +186,10 @@ contains ! but with size 0, then CREATE,UPDATE and DELETE ! will fail ! - if (psb_size(a%val)>0) call acc_create(a%val) - if (psb_size(a%ja)>0) call acc_create(a%ja) - if (psb_size(a%irn)>0) call acc_create(a%irn) - if (psb_size(a%idiag)>0) call acc_create(a%idiag) + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irn)>0) call acc_copyin(a%irn) + if (psb_size(a%idiag)>0) call acc_copyin(a%idiag) end subroutine z_oacc_ell_sync_dev_space function z_oacc_ell_is_host(a) result(res) diff --git a/openacc/psb_z_oacc_hll_mat_mod.F90 b/openacc/psb_z_oacc_hll_mat_mod.F90 index 07739348..4c9f1b11 100644 --- a/openacc/psb_z_oacc_hll_mat_mod.F90 +++ b/openacc/psb_z_oacc_hll_mat_mod.F90 @@ -240,11 +240,11 @@ contains ! but with size 0, then CREATE,UPDATE and DELETE ! will fail ! - if (psb_size(a%val)>0) call acc_create(a%val) - if (psb_size(a%ja)>0) call acc_create(a%ja) - if (psb_size(a%irn)>0) call acc_create(a%irn) - if (psb_size(a%idiag)>0) call acc_create(a%idiag) - if (psb_size(a%hkoffs)>0) call acc_create(a%hkoffs) + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irn)>0) call acc_copyin(a%irn) + if (psb_size(a%idiag)>0) call acc_copyin(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_copyin(a%hkoffs) end subroutine z_oacc_hll_sync_dev_space diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90 index 0fe1adaa..0bc10283 100644 --- a/openacc/psb_z_oacc_vect_mod.F90 +++ b/openacc/psb_z_oacc_vect_mod.F90 @@ -113,7 +113,7 @@ contains complex(psb_dpk_), intent(inout) :: x(:) integer(psb_ipk_) :: n integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x) do i = 1, n x(i) = abs(x(i)) end do @@ -144,7 +144,7 @@ contains complex(psb_dpk_), intent(inout) :: x(:),y(:) integer(psb_ipk_) :: n integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x,y) do i = 1, n y(i) = abs(x(i)) end do @@ -164,7 +164,7 @@ contains complex(psb_dpk_), intent(in) :: alpha complex(psb_dpk_), intent(inout) :: x(:) integer(psb_ipk_) :: i - !$acc parallel loop + !$acc parallel loop present(x) do i = 1, size(x) x(i) = alpha * x(i) end do @@ -189,7 +189,7 @@ contains real(psb_dpk_) :: sum, mx integer(psb_ipk_) :: i mx = dzero - !$acc parallel loop reduction(max:mx) + !$acc parallel loop reduction(max:mx) present(x) do i = 1, n if (abs(x(i)) > mx) mx = abs(x(i)) end do @@ -197,7 +197,7 @@ contains res = mx else sum = dzero - !$acc parallel loop reduction(+:sum) + !$acc parallel loop reduction(+:sum) present(x) do i = 1, n sum = sum + abs(x(i)/mx)**2 end do @@ -223,7 +223,7 @@ contains real(psb_dpk_) :: max_val integer(psb_ipk_) :: i max_val = dzero - !$acc parallel loop reduction(max:max_val) + !$acc parallel loop reduction(max:max_val) present(x) do i = 1, n if (abs(x(i)) > max_val) max_val = abs(x(i)) end do @@ -248,7 +248,7 @@ contains real(psb_dpk_) :: res integer(psb_ipk_) :: i res = dzero - !$acc parallel loop reduction(+:res) + !$acc parallel loop reduction(+:res) present(x) do i = 1, n res = res + abs(x(i)) end do @@ -265,7 +265,7 @@ contains info = 0 if (y%is_dev()) call y%sync() - !$acc parallel loop + !$acc parallel loop present(x,y) do i = 1, size(x) y%v(i) = y%v(i) * x(i) end do @@ -283,7 +283,7 @@ contains info = 0 if (z%is_dev()) call z%sync() - !$acc parallel loop + !$acc parallel loop present(x,y,z%v) do i = 1, size(x) z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) end do @@ -327,7 +327,7 @@ contains complex(psb_dpk_), intent(inout) :: y(:) complex(psb_dpk_), intent(in) :: alpha, beta integer(psb_ipk_), intent(out) :: info - !$acc parallel + !$acc parallel present(x,y) !$acc loop do i = 1, m y(i) = alpha * x(i) + beta * y(i) @@ -384,7 +384,7 @@ contains if ((nx < m) .or. (ny < m) .or. (nz < m)) then info = psb_err_internal_error_ else - !$acc parallel loop + !$acc parallel loop present(xx%v,yy%v,zz%v) do i = 1, m yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) @@ -416,6 +416,7 @@ contains integer(psb_ipk_) :: info, k logical :: acc_done if (.not.allocated(y%combuf)) then + write(0,*) 'allocation error for y%combuf ' call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') return end if @@ -443,8 +444,8 @@ contains integer(psb_ipk_) :: n, idx(:) complex(psb_dpk_) :: beta,x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -488,8 +489,8 @@ contains integer(psb_ipk_) :: n, idx(:) complex(psb_dpk_) :: beta, x(:), y(:) integer(psb_ipk_) :: k - !$acc update device(x(1:n)) async - !$acc parallel loop + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) end do @@ -531,6 +532,7 @@ contains acc_done = .false. if (.not.allocated(x%combuf)) then + write(0,*) 'oacc allocation error combuf gthzbuf ' call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') return end if @@ -556,13 +558,13 @@ contains integer(psb_ipk_) :: n, idx(:) complex(psb_dpk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine z_oacc_gthzbuf @@ -600,13 +602,13 @@ contains integer(psb_ipk_) :: n, idx(:) complex(psb_dpk_) :: x(:), y(:) integer(psb_ipk_) :: k - - !$acc parallel loop present(y) + ! + !$acc parallel loop present(x,y) do k = 1, n y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y(1:n)) async + !$acc update self(y(1:n)) end subroutine inner_gth end subroutine z_oacc_gthzv_x @@ -633,7 +635,7 @@ contains if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() - !$acc parallel loop + !$acc parallel loop present(x%v,virl%v,vval%v) do i = 1, n x%v(virl%v(i)) = vval%v(i) end do @@ -757,7 +759,7 @@ contains if (present(first)) first_ = max(1, first) if (present(last)) last_ = min(last, last_) - !$acc parallel loop + !$acc parallel loop present(x%v) do i = first_, last_ x%v(i) = val end do @@ -849,26 +851,36 @@ contains class(psb_z_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (n /= psb_size(x%combuf)) then + + !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf) + if (n > psb_size(x%combuf)) then + !write(0,*) 'oacc new_buffer: reallocating ' + if (allocated(x%combuf)) then + !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + !$acc exit data delete(x%combuf) + end if call x%psb_z_base_vect_type%new_buffer(n,info) !$acc enter data copyin(x%combuf) + ! call acc_copyin(x%combuf) end if end subroutine z_oacc_new_buffer subroutine z_oacc_sync_dev_space(x) implicit none class(psb_z_vect_oacc), intent(inout) :: x - if (allocated(x%v)) call acc_create(x%v) +!!$ write(0,*) 'oacc sync_dev_space' + if (psb_size(x%v)>0) call acc_copyin(x%v) end subroutine z_oacc_sync_dev_space subroutine z_oacc_sync(x) implicit none class(psb_z_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call acc_update_self(x%v) + if (psb_size(x%v)>0) call acc_update_self(x%v) end if if (x%is_host()) then - call acc_update_device(x%v) + if (.not.acc_is_present(x%v)) call z_oacc_sync_dev_space(x) + if (psb_size(x%v)>0) call acc_update_device(x%v) end if call x%set_sync() end subroutine z_oacc_sync @@ -941,6 +953,8 @@ contains type(psb_z_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info info = 0 +!!$ write(0,*) 'oacc final_vect_free' + call x%free_buffer(info) if (allocated(x%v)) then if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) @@ -953,8 +967,9 @@ contains class(psb_z_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 +!!$ write(0,*) 'oacc vect_free' + call x%free_buffer(info) if (acc_is_present(x%v)) call acc_delete_finalize(x%v) - if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_z_base_vect_type%free(info) end subroutine z_oacc_vect_free @@ -964,8 +979,10 @@ contains integer(psb_ipk_), intent(out) :: info info = 0 - if (psb_oacc_get_maybe_free_buffer())& - & call x%free_buffer(info) + if (psb_oacc_get_maybe_free_buffer()) then + !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer() + call x%free_buffer(info) + end if end subroutine z_oacc_vect_maybe_free_buffer @@ -973,7 +990,7 @@ contains implicit none class(psb_z_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info - +! write(0,*) 'oacc free_buffer' info = 0 if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) call x%psb_z_base_vect_type%free_buffer(info) @@ -985,7 +1002,6 @@ contains class(psb_z_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: res - if (x%is_dev()) call x%sync() res = size(x%v) end function z_oacc_get_size From 5903c0b272dc1c40d10ada8ee50ef6d94b1b1183 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 8 Oct 2024 17:41:42 +0200 Subject: [PATCH 4/4] Fix DOT in OpenACC --- openacc/psb_c_oacc_vect_mod.F90 | 12 ++++-------- openacc/psb_d_oacc_vect_mod.F90 | 8 ++------ openacc/psb_s_oacc_vect_mod.F90 | 12 ++++-------- openacc/psb_z_oacc_vect_mod.F90 | 12 ++++-------- 4 files changed, 14 insertions(+), 30 deletions(-) diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90 index 40437184..067c571b 100644 --- a/openacc/psb_c_oacc_vect_mod.F90 +++ b/openacc/psb_c_oacc_vect_mod.F90 @@ -798,22 +798,18 @@ contains class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(in) :: n complex(psb_spk_) :: res - complex(psb_spk_), external :: ddot integer(psb_ipk_) :: info res = czero !!$ write(0,*) 'oacc_dot_v' select type(yy => y) - type is (psb_c_base_vect_type) - if (x%is_dev()) call x%sync() - res = ddot(n, x%v, 1, yy%v, 1) type is (psb_c_vect_oacc) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() res = c_inner_oacc_dot(n, x%v, yy%v) class default - call x%sync() - res = y%dot(n, x%v) + if (x%is_dev()) call x%sync() + res = y%dot(n, x%v) end select contains function c_inner_oacc_dot(n, x, y) result(res) @@ -838,10 +834,10 @@ contains complex(psb_spk_), intent(in) :: y(:) integer(psb_ipk_), intent(in) :: n complex(psb_spk_) :: res - complex(psb_spk_), external :: ddot + complex(psb_spk_), external :: cdot if (x%is_dev()) call x%sync() - res = ddot(n, y, 1, x%v, 1) + res = cdot(n, y, 1, x%v, 1) end function c_oacc_dot_a diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90 index 84441c8a..929066ae 100644 --- a/openacc/psb_d_oacc_vect_mod.F90 +++ b/openacc/psb_d_oacc_vect_mod.F90 @@ -798,22 +798,18 @@ contains class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res - real(psb_dpk_), external :: ddot integer(psb_ipk_) :: info res = dzero !!$ write(0,*) 'oacc_dot_v' select type(yy => y) - type is (psb_d_base_vect_type) - if (x%is_dev()) call x%sync() - res = ddot(n, x%v, 1, yy%v, 1) type is (psb_d_vect_oacc) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() res = d_inner_oacc_dot(n, x%v, yy%v) class default - call x%sync() - res = y%dot(n, x%v) + if (x%is_dev()) call x%sync() + res = y%dot(n, x%v) end select contains function d_inner_oacc_dot(n, x, y) result(res) diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90 index 70c9dd49..9cb42a95 100644 --- a/openacc/psb_s_oacc_vect_mod.F90 +++ b/openacc/psb_s_oacc_vect_mod.F90 @@ -798,22 +798,18 @@ contains class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res - real(psb_spk_), external :: ddot integer(psb_ipk_) :: info res = szero !!$ write(0,*) 'oacc_dot_v' select type(yy => y) - type is (psb_s_base_vect_type) - if (x%is_dev()) call x%sync() - res = ddot(n, x%v, 1, yy%v, 1) type is (psb_s_vect_oacc) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() res = s_inner_oacc_dot(n, x%v, yy%v) class default - call x%sync() - res = y%dot(n, x%v) + if (x%is_dev()) call x%sync() + res = y%dot(n, x%v) end select contains function s_inner_oacc_dot(n, x, y) result(res) @@ -838,10 +834,10 @@ contains real(psb_spk_), intent(in) :: y(:) integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res - real(psb_spk_), external :: ddot + real(psb_spk_), external :: sdot if (x%is_dev()) call x%sync() - res = ddot(n, y, 1, x%v, 1) + res = sdot(n, y, 1, x%v, 1) end function s_oacc_dot_a diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90 index 0bc10283..90ddcf0d 100644 --- a/openacc/psb_z_oacc_vect_mod.F90 +++ b/openacc/psb_z_oacc_vect_mod.F90 @@ -798,22 +798,18 @@ contains class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(in) :: n complex(psb_dpk_) :: res - complex(psb_dpk_), external :: ddot integer(psb_ipk_) :: info res = zzero !!$ write(0,*) 'oacc_dot_v' select type(yy => y) - type is (psb_z_base_vect_type) - if (x%is_dev()) call x%sync() - res = ddot(n, x%v, 1, yy%v, 1) type is (psb_z_vect_oacc) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() res = z_inner_oacc_dot(n, x%v, yy%v) class default - call x%sync() - res = y%dot(n, x%v) + if (x%is_dev()) call x%sync() + res = y%dot(n, x%v) end select contains function z_inner_oacc_dot(n, x, y) result(res) @@ -838,10 +834,10 @@ contains complex(psb_dpk_), intent(in) :: y(:) integer(psb_ipk_), intent(in) :: n complex(psb_dpk_) :: res - complex(psb_dpk_), external :: ddot + complex(psb_dpk_), external :: zdot if (x%is_dev()) call x%sync() - res = ddot(n, y, 1, x%v, 1) + res = zdot(n, y, 1, x%v, 1) end function z_oacc_dot_a