diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index 1b162457..53ffcea9 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -2879,9 +2879,9 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (info /= 0) write(0,*) name,' point 0:',info,isza,nza,nz ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then - write(0,*) ' before reallocate in csput ',psb_errstatus_fatal(),info + !write(0,*) ' before reallocate in csput ',psb_errstatus_fatal(),info call a%reallocate(max(nza+nz,int(1.5*isza))) - write(0,*) ' after reallocate in csput ',psb_errstatus_fatal(),info + !write(0,*) ' after reallocate in csput ',psb_errstatus_fatal(),info endif isza = a%get_size() if (isza < (nza+nz)) then diff --git a/base/serial/impl/psb_c_csr_impl.F90 b/base/serial/impl/psb_c_csr_impl.F90 index 5a795583..0e040a7b 100644 --- a/base/serial/impl/psb_c_csr_impl.F90 +++ b/base/serial/impl/psb_c_csr_impl.F90 @@ -152,7 +152,7 @@ contains !$omp parallel do private(i,j, acc) schedule(static) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -164,7 +164,7 @@ contains !$omp parallel do private(i,j, acc) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -176,7 +176,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -192,7 +192,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -204,7 +204,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -216,7 +216,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -231,7 +231,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -243,7 +243,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -255,7 +255,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -270,7 +270,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -282,7 +282,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -294,7 +294,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index 9a20164a..7a79847e 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -2879,9 +2879,9 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (info /= 0) write(0,*) name,' point 0:',info,isza,nza,nz ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then - write(0,*) ' before reallocate in csput ',psb_errstatus_fatal(),info + !write(0,*) ' before reallocate in csput ',psb_errstatus_fatal(),info call a%reallocate(max(nza+nz,int(1.5*isza))) - write(0,*) ' after reallocate in csput ',psb_errstatus_fatal(),info + !write(0,*) ' after reallocate in csput ',psb_errstatus_fatal(),info endif isza = a%get_size() if (isza < (nza+nz)) then diff --git a/base/serial/impl/psb_d_csr_impl.F90 b/base/serial/impl/psb_d_csr_impl.F90 index af842c16..d6b6e9cb 100644 --- a/base/serial/impl/psb_d_csr_impl.F90 +++ b/base/serial/impl/psb_d_csr_impl.F90 @@ -152,7 +152,7 @@ contains !$omp parallel do private(i,j, acc) schedule(static) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -164,7 +164,7 @@ contains !$omp parallel do private(i,j, acc) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -176,7 +176,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -192,7 +192,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -204,7 +204,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -216,7 +216,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -231,7 +231,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -243,7 +243,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -255,7 +255,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -270,7 +270,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -282,7 +282,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -294,7 +294,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index e944ce7c..b4e6a8df 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -2879,9 +2879,9 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (info /= 0) write(0,*) name,' point 0:',info,isza,nza,nz ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then - write(0,*) ' before reallocate in csput ',psb_errstatus_fatal(),info + !write(0,*) ' before reallocate in csput ',psb_errstatus_fatal(),info call a%reallocate(max(nza+nz,int(1.5*isza))) - write(0,*) ' after reallocate in csput ',psb_errstatus_fatal(),info + !write(0,*) ' after reallocate in csput ',psb_errstatus_fatal(),info endif isza = a%get_size() if (isza < (nza+nz)) then diff --git a/base/serial/impl/psb_s_csr_impl.F90 b/base/serial/impl/psb_s_csr_impl.F90 index 01657071..49d168bb 100644 --- a/base/serial/impl/psb_s_csr_impl.F90 +++ b/base/serial/impl/psb_s_csr_impl.F90 @@ -152,7 +152,7 @@ contains !$omp parallel do private(i,j, acc) schedule(static) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -164,7 +164,7 @@ contains !$omp parallel do private(i,j, acc) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -176,7 +176,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -192,7 +192,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -204,7 +204,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -216,7 +216,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -231,7 +231,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -243,7 +243,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -255,7 +255,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -270,7 +270,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -282,7 +282,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -294,7 +294,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index f88dc92b..949d0384 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -2879,9 +2879,9 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (info /= 0) write(0,*) name,' point 0:',info,isza,nza,nz ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then - write(0,*) ' before reallocate in csput ',psb_errstatus_fatal(),info + !write(0,*) ' before reallocate in csput ',psb_errstatus_fatal(),info call a%reallocate(max(nza+nz,int(1.5*isza))) - write(0,*) ' after reallocate in csput ',psb_errstatus_fatal(),info + !write(0,*) ' after reallocate in csput ',psb_errstatus_fatal(),info endif isza = a%get_size() if (isza < (nza+nz)) then diff --git a/base/serial/impl/psb_z_csr_impl.F90 b/base/serial/impl/psb_z_csr_impl.F90 index 947c8329..1fc387d8 100644 --- a/base/serial/impl/psb_z_csr_impl.F90 +++ b/base/serial/impl/psb_z_csr_impl.F90 @@ -152,7 +152,7 @@ contains !$omp parallel do private(i,j, acc) schedule(static) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -164,7 +164,7 @@ contains !$omp parallel do private(i,j, acc) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -176,7 +176,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -192,7 +192,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -204,7 +204,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -216,7 +216,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -231,7 +231,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -243,7 +243,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -255,7 +255,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -270,7 +270,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -282,7 +282,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -294,7 +294,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo diff --git a/cuda/Makefile b/cuda/Makefile index 1a595bc7..b47e58ce 100755 --- a/cuda/Makefile +++ b/cuda/Makefile @@ -51,6 +51,7 @@ lib: objs ilib cudalib spgpulib $(AR) $(LIBNAME) $(OBJS) /bin/cp -p $(LIBNAME) $(LIBDIR) +$(COBJS): spgpuinc objs: spgpuinc $(OBJS) iobjs cudaobjs spgpuobjs /bin/cp -p *$(.mod) $(MODDIR)