diff --git a/base/modules/serial/psb_c_base_mat_mod.F90 b/base/modules/serial/psb_c_base_mat_mod.F90 index 6924d819..ab43f386 100644 --- a/base/modules/serial/psb_c_base_mat_mod.F90 +++ b/base/modules/serial/psb_c_base_mat_mod.F90 @@ -1865,6 +1865,27 @@ module psb_c_base_mat_mod integer(psb_ipk_), intent(in), optional :: idir end subroutine psb_c_fix_coo_inner end interface + interface + subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,info) + import + integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:) + complex(psb_spk_), intent(inout) :: val(:) + integer(psb_ipk_), intent(out) :: nzout + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_fix_coo_inner_rowmajor + end interface + interface + subroutine psb_c_fix_coo_inner_colmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,info) + import + integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:) + complex(psb_spk_), intent(inout) :: val(:) + integer(psb_ipk_), intent(out) :: nzout + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_fix_coo_inner_colmajor + + end interface ! !> Function fix_coo diff --git a/base/modules/serial/psb_d_base_mat_mod.F90 b/base/modules/serial/psb_d_base_mat_mod.F90 index a6ff7c5e..48e9ec1e 100644 --- a/base/modules/serial/psb_d_base_mat_mod.F90 +++ b/base/modules/serial/psb_d_base_mat_mod.F90 @@ -1865,6 +1865,27 @@ module psb_d_base_mat_mod integer(psb_ipk_), intent(in), optional :: idir end subroutine psb_d_fix_coo_inner end interface + interface + subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,info) + import + integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:) + real(psb_dpk_), intent(inout) :: val(:) + integer(psb_ipk_), intent(out) :: nzout + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_fix_coo_inner_rowmajor + end interface + interface + subroutine psb_d_fix_coo_inner_colmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,info) + import + integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:) + real(psb_dpk_), intent(inout) :: val(:) + integer(psb_ipk_), intent(out) :: nzout + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_fix_coo_inner_colmajor + + end interface ! !> Function fix_coo diff --git a/base/modules/serial/psb_s_base_mat_mod.F90 b/base/modules/serial/psb_s_base_mat_mod.F90 index 95bab09e..62706c1c 100644 --- a/base/modules/serial/psb_s_base_mat_mod.F90 +++ b/base/modules/serial/psb_s_base_mat_mod.F90 @@ -1865,6 +1865,27 @@ module psb_s_base_mat_mod integer(psb_ipk_), intent(in), optional :: idir end subroutine psb_s_fix_coo_inner end interface + interface + subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,info) + import + integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:) + real(psb_spk_), intent(inout) :: val(:) + integer(psb_ipk_), intent(out) :: nzout + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_fix_coo_inner_rowmajor + end interface + interface + subroutine psb_s_fix_coo_inner_colmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,info) + import + integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:) + real(psb_spk_), intent(inout) :: val(:) + integer(psb_ipk_), intent(out) :: nzout + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_fix_coo_inner_colmajor + + end interface ! !> Function fix_coo diff --git a/base/modules/serial/psb_z_base_mat_mod.F90 b/base/modules/serial/psb_z_base_mat_mod.F90 index 88ba36ec..ea7e0de5 100644 --- a/base/modules/serial/psb_z_base_mat_mod.F90 +++ b/base/modules/serial/psb_z_base_mat_mod.F90 @@ -1865,6 +1865,27 @@ module psb_z_base_mat_mod integer(psb_ipk_), intent(in), optional :: idir end subroutine psb_z_fix_coo_inner end interface + interface + subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,info) + import + integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:) + complex(psb_dpk_), intent(inout) :: val(:) + integer(psb_ipk_), intent(out) :: nzout + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_fix_coo_inner_rowmajor + end interface + interface + subroutine psb_z_fix_coo_inner_colmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,info) + import + integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:) + complex(psb_dpk_), intent(inout) :: val(:) + integer(psb_ipk_), intent(out) :: nzout + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_fix_coo_inner_colmajor + + end interface ! !> Function fix_coo diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index d75631cc..5b478c0e 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -728,7 +728,7 @@ subroutine psb_c_coo_print(iout,a,iv,head,ivr,ivc) character(len=80) :: frmt integer(psb_ipk_) :: i,j, ni, nr, nc, nz - write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' if (present(head)) write(iout,'(a,a)') '% ',head write(iout,'(a)') '%' write(iout,'(a,a)') '% COO' @@ -3172,9 +3172,9 @@ subroutine psb_c_cp_coo_from_coo(a,b,info) integer(psb_ipk_) :: i !$omp parallel do private(i) do i=1, nz - a%ia(i) = b%ia(i) - a%ja(i) = b%ja(i) - a%val(i) = b%val(i) + a%ia(i) = b%ia(i) + a%ja(i) = b%ja(i) + a%val(i) = b%val(i) end do end block #else @@ -3615,6 +3615,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) select case(idir_) case(psb_row_major_) +#if 0 ! Row major order if (nr <= nzin) then ! Avoid strange situations with large indices @@ -3730,7 +3731,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) end if if (ithread == 0) then - iaux(1) = 1 + iaux(1) = 1 end if !$OMP BARRIER @@ -3782,11 +3783,11 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) ! The row has elements? if (nzl > 0) then call psi_msort_up(nzl,jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) + & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) if (iret == 0) then - call psb_ip_reord(nzl,vs(first_elem:last_elem),& - & ias(first_elem:last_elem),jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) end if ! Over each row we count the unique values @@ -3804,8 +3805,8 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) ! ---------------- kaux composition ---------------- !$OMP SINGLE - sum(:) = 0 - sum(1) = 1 + sum(:) = 0 + sum(1) = 1 !$OMP END SINGLE s = 0 @@ -3957,7 +3958,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) deallocate(sum,kaux,idxaux,stat=info) #else !if (.not.srt_inp) then - + ip = iaux(1) iaux(1) = 0 do i=2, nr @@ -4086,7 +4087,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) enddo end if end do - + case default write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl_ info =-7 @@ -4097,7 +4098,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) deallocate(ix2, stat=info) #endif - + deallocate(ias,jas,vs, stat=info) else if (.not.use_buffers) then @@ -4261,9 +4262,12 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) if(debug_level >= psb_debug_serial_)& & write(debug_unit,*) trim(name),': end second loop' - +#else + call psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,& + & ia,ja,val,iaux,nzout,info) +#endif case(psb_col_major_) - +#if 0 if (nc <= nzin) then ! Avoid strange situations with large indices #if defined(OPENMP) @@ -4375,7 +4379,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) saved_elem = iaux(first_idx) end if if (ithread == 0) then - iaux(1) = 1 + iaux(1) = 1 end if !$OMP BARRIER @@ -4429,9 +4433,9 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) & idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret) if (iret == 0) then - call psb_ip_reord(nzl,vs(first_elem:last_elem),& - & ias(first_elem:last_elem),jas(first_elem:last_elem), & - & idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) end if ! Over each column we count the unique values @@ -4490,9 +4494,9 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) !$OMP BARRIER - ! ------------------------------------------------ + ! ------------------------------------------------ - select case(dupl_) + select case(dupl_) case(psb_dupl_ovwrt_) !$OMP DO schedule(STATIC) do j=1,nc @@ -4620,7 +4624,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) iaux(icl) = ip end do !end if - + select case(dupl_) case(psb_dupl_ovwrt_) k = 0 @@ -4733,7 +4737,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) info =-7 return end select - + nzout = k deallocate(ix2, stat=info) @@ -4786,9 +4790,9 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) if (nzl > 0) then call psi_msort_up(nzl,ia(first_elem:),iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret) if (iret == 0) & - & call psb_ip_reord(nzl,val(first_elem:last_elem),& - & ia(first_elem:last_elem),ja(first_elem:last_elem),& - & iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),& + & iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) end if act_col = act_col + 1 @@ -4889,7 +4893,10 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) & write(debug_unit,*) trim(name),': end second loop' end if - +#else + call psb_c_fix_coo_inner_colmajor(nr,nc,nzin,dupl,& + & ia,ja,val,iaux,nzout,info) +#endif case default write(debug_unit,*) trim(name),': unknown direction ',idir_ info = psb_err_internal_error_ @@ -4908,219 +4915,1560 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) end subroutine psb_c_fix_coo_inner -subroutine psb_c_cp_coo_to_lcoo(a,b,info) +subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,info) + use psb_const_mod use psb_error_mod - use psb_c_base_mat_mod, psb_protect_name => psb_c_cp_coo_to_lcoo + use psb_c_base_mat_mod, psb_protect_name => psb_c_fix_coo_inner_rowmajor + use psb_string_mod + use psb_ip_reord_mod + use psb_sort_mod +#if defined(OPENMP) + use omp_lib +#endif implicit none - class(psb_c_coo_sparse_mat), intent(in) :: a - class(psb_lc_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - - integer(psb_ipk_) :: err_act - integer(psb_lpk_) :: nz - character(len=20) :: name='to_coo' - logical, parameter :: debug=.false. + integer(psb_ipk_), intent(in) :: nr, nc, nzin, dupl + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:) + complex(psb_spk_), intent(inout) :: val(:) + integer(psb_ipk_), intent(out) :: nzout, info + !locals + integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) + complex(psb_spk_), allocatable :: vs(:) + integer(psb_ipk_) :: nza, nzl,iret + integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name = 'psb_fixcoo' + logical :: srt_inp, use_buffers - call psb_erractionsave(err_act) - info = psb_success_ - if (a%is_dev()) call a%sync() - - b%psb_lbase_sparse_mat = a%psb_base_sparse_mat - call b%set_sort_status(a%get_sort_status()) - nz = a%get_nzeros() - call b%set_nzeros(nz) - call b%reallocate(nz) + ! Row major order + if (nr <= nzin) then + ! Avoid strange situations with large indices +#if defined(OPENMP) + ! We are not going to need 'ix2' because of the presence + ! of 'idxaux' as auxiliary buffer. + allocate(ias(nzin),jas(nzin),vs(nzin), stat=info) +#else + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) +#endif + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then + iaux(:) = 0 #if defined(OPENMP) - block - integer(psb_ipk_) :: i - !$omp parallel do private(i) - do i=1, nz - b%ia(i) = a%ia(i) - b%ja(i) = a%ja(i) - b%val(i) = a%val(i) + !$OMP PARALLEL DO default(none) schedule(STATIC) & + !$OMP shared(nzin,ia,nr,iaux) & + !$OMP private(i) & + !$OMP reduction(.and.:use_buffers) + do i=1,nzin + if ((ia(i) < 1).or.(ia(i) > nr)) then + use_buffers = .false. + ! Invalid indices are placed outside the considered range + ia(i) = nr+2 + else + !$OMP ATOMIC UPDATE + iaux(ia(i)) = iaux(ia(i)) + 1 + end if end do - end block + !$OMP END PARALLEL DO #else - b%ia(1:nz) = a%ia(1:nz) - b%ja(1:nz) = a%ja(1:nz) - b%val(1:nz) = a%val(1:nz) + !srt_inp = .true. + do i=1,nzin + if ((ia(i) < 1).or.(ia(i) > nr)) then + use_buffers = .false. + !ia(i) = nr+2 + !srt_inp = .false. + exit + end if + + iaux(ia(i)) = iaux(ia(i)) + 1 + + !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) + end do #endif - call b%set_host() + end if - if (.not.b%is_by_rows()) call b%fix(info) + ! Check again use_buffers. We enter here if nzin >= nr and + ! all the indices are valid + ! Check again use_buffers. + if (use_buffers) then +#if defined(OPENMP) + allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if - if (info /= psb_success_) goto 9999 + kaux(:) = 0 + sum(:) = 0 + sum(1) = 1 + err = 0 - call psb_erractionrestore(err_act) - return + ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting + ! index for each row. We do the same on 'kaux' + !$OMP PARALLEL default(none) & + !$OMP shared(idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP private(s,i,j,k,ithread,first_idx,last_idx,work,nxt_val,old_val,saved_elem, & + !$OMP first_elem,last_elem,nzl,iret,act_row) -9999 call psb_error_handler(err_act) + !$OMP SINGLE + nthreads = omp_get_num_threads() + !$OMP END SINGLE - return + ithread = omp_get_thread_num() -end subroutine psb_c_cp_coo_to_lcoo + ! -------- thread-specific workload -------- -subroutine psb_c_cp_coo_from_lcoo(a,b,info) - use psb_error_mod - use psb_c_base_mat_mod, psb_protect_name => psb_c_cp_coo_from_lcoo - implicit none - class(psb_c_coo_sparse_mat), intent(inout) :: a - class(psb_lc_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info + work = nr/nthreads + if (ithread < MOD(nr,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nr,nthreads) + 1 + end if - integer(psb_ipk_) :: err_act - character(len=20) :: name='from_coo' - logical, parameter :: debug=.false. - integer(psb_ipk_) :: m,n,nz + last_idx = first_idx + work - 1 - call psb_erractionsave(err_act) - info = psb_success_ - if (b%is_dev()) call b%sync() - a%psb_base_sparse_mat = b%psb_lbase_sparse_mat - call a%set_sort_status(b%get_sort_status()) - nz = b%get_nzeros() - call a%set_nzeros(nz) - call a%reallocate(nz) + !------------------------------------------- + ! ------------ iaux composition -------------- + ! 'iaux' will have the start index for each row -#if defined(OPENMP) - block - integer(psb_ipk_) :: i - !$omp parallel do private(i) - do i=1, nz - a%ia(i) = b%ia(i) - a%ja(i) = b%ja(i) - a%val(i) = b%val(i) + s = 0 + do i=first_idx,last_idx + s = s + iaux(i) end do - end block -#else - a%ia(1:nz) = b%ia(1:nz) - a%ja(1:nz) = b%ja(1:nz) - a%val(1:nz) = b%val(1:nz) -#endif - call a%set_host() + sum(ithread+2) = s - if (.not.a%is_by_rows()) call a%fix(info) - - if (info /= psb_success_) goto 9999 + !$OMP BARRIER - call psb_erractionrestore(err_act) - return + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + !$OMP END SINGLE -9999 continue - call psb_errpush(info,name) + if (work > 0) then + saved_elem = iaux(first_idx) + end if - call psb_error_handler(err_act) + if (ithread == 0) then + iaux(1) = 1 + end if - return + !$OMP BARRIER -end subroutine psb_c_cp_coo_from_lcoo + if (work > 0) then + old_val = iaux(first_idx+1) + iaux(first_idx+1) = saved_elem + sum(ithread+1) + end if + do i=first_idx+2,last_idx+1 + nxt_val = iaux(i) + iaux(i) = iaux(i-1) + old_val + old_val = nxt_val + end do -! -! -! lc coo impl -! -! + ! -------------------------------------- -subroutine psb_lc_coo_get_diag(a,d,info) - use psb_c_base_mat_mod, psb_protect_name => psb_lc_coo_get_diag - use psb_error_mod - use psb_const_mod - implicit none - class(psb_lc_coo_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(out) :: d(:) - integer(psb_ipk_), intent(out) :: info + !$OMP BARRIER - integer(psb_ipk_) :: err_act - integer(psb_lpk_) :: mnm, i, j - character(len=20) :: name='get_diag' - logical, parameter :: debug=.false. + ! ------------------ Sorting and buffers ------------------- - info = psb_success_ - call psb_erractionsave(err_act) - if (a%is_dev()) call a%sync() + ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving + ! unmodified 'iaux' + do j=first_idx,last_idx + idxaux(j) = iaux(j) + end do - mnm = min(a%get_nrows(),a%get_ncols()) - if (size(d) < mnm) then - info=psb_err_input_asize_invalid_i_ - call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) - goto 9999 - end if + ! Here we sort data inside the auxiliary buffers + do i=1,nzin + act_row = ia(i) + if ((act_row >= first_idx) .and. (act_row <= last_idx)) then + ias(idxaux(act_row)) = ia(i) + jas(idxaux(act_row)) = ja(i) + vs(idxaux(act_row)) = val(i) - if (a%is_unit()) then - d(1:mnm) = cone - else - d(1:mnm) = czero - do i=1,a%get_nzeros() - j=a%ia(i) - if ((j == a%ja(i)) .and.(j <= mnm ) .and.(j>0)) then - d(j) = a%val(i) - endif - enddo - end if - call psb_erractionrestore(err_act) - return + idxaux(act_row) = idxaux(act_row) + 1 + end if + end do -9999 call psb_error_handler(err_act) + !$OMP BARRIER + + ! Let's sort column indices and values. After that we will store + ! the number of unique values in 'kaux' + do j=first_idx,last_idx + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + nzl = last_elem - first_elem + 1 + + ! The row has elements? + if (nzl > 0) then + call psi_msort_up(nzl,jas(first_elem:last_elem), & + & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) + if (iret == 0) then + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) + end if - return + ! Over each row we count the unique values + kaux(j) = 1 + do i=first_elem+1,last_elem + if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then + cycle + end if + kaux(j) = kaux(j) + 1 + end do + end if + end do -end subroutine psb_lc_coo_get_diag + ! -------------------------------------------------- + ! ---------------- kaux composition ---------------- -subroutine psb_lc_coo_scal(d,a,info,side) - use psb_c_base_mat_mod, psb_protect_name => psb_lc_coo_scal - use psb_error_mod - use psb_const_mod - use psb_string_mod - implicit none - class(psb_lc_coo_sparse_mat), intent(inout) :: a - complex(psb_spk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in), optional :: side + !$OMP SINGLE + sum(:) = 0 + sum(1) = 1 + !$OMP END SINGLE - integer(psb_ipk_) :: err_act - integer(psb_lpk_) :: mnm, i, j, m - character(len=20) :: name='scal' - character :: side_ - logical :: left - logical, parameter :: debug=.false. + s = 0 + do i=first_idx,last_idx + s = s + kaux(i) + end do + sum(ithread+2) = s - info = psb_success_ - call psb_erractionsave(err_act) - if (a%is_dev()) call a%sync() + !$OMP BARRIER - if (a%is_unit()) then - call a%make_nonunit() - end if + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + kaux(nr+1) = sum(nthreads+1) + !$OMP END SINGLE - side_ = 'L' - if (present(side)) then - side_ = psb_toupper(side) - end if + if (work > 0) then + saved_elem = kaux(first_idx) + end if + if (ithread == 0) then + kaux(1) = 1 + end if - left = (side_ == 'L') + !$OMP BARRIER - if (left) then - m = a%get_nrows() - if (size(d) < m) then - info=psb_err_input_asize_invalid_i_ - call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) - goto 9999 + if (work > 0) then + old_val = kaux(first_idx+1) + kaux(first_idx+1) = saved_elem + sum(ithread+1) end if - do i=1,a%get_nzeros() - j = a%ia(i) - a%val(i) = a%val(i) * d(j) - enddo - else - m = a%get_ncols() - if (size(d) < m) then - info=psb_err_input_asize_invalid_i_ - call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) - goto 9999 - end if + do i=first_idx+2,last_idx+1 + nxt_val = kaux(i) + kaux(i) = kaux(i-1) + old_val + old_val = nxt_val + end do + + !$OMP BARRIER + + ! ------------------------------------------------ + + select case(dupl) + case(psb_dupl_ovwrt_) + !$OMP DO schedule(STATIC) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + val(k) = vs(i) + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + endif + end do + end do + !$OMP END DO + + case(psb_dupl_add_) + !$OMP DO schedule(STATIC) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + val(k) = val(k) + vs(i) + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + endif + end do + end do + !$OMP END DO + + case(psb_dupl_err_) + !$OMP DO schedule(STATIC) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + err = 1 + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + end if + end do + end do + !$OMP END DO + + case default + !$OMP SINGLE + err = 2 + !$OMP END SINGLE + end select + + !$OMP END PARALLEL + + if (err == 1) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else if (err == 2) then + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + return + end if + + nzout = kaux(nr+1) - 1 + + deallocate(sum,kaux,idxaux,stat=info) +#else + !if (.not.srt_inp) then + + ip = iaux(1) + iaux(1) = 0 + do i=2, nr + is = iaux(i) + iaux(i) = ip + ip = ip + is + end do + iaux(nr+1) = ip + + do i=1,nzin + irw = ia(i) + ip = iaux(irw) + 1 + ias(ip) = ia(i) + jas(ip) = ja(i) + vs(ip) = val(i) + iaux(irw) = ip + end do + !end if + + select case(dupl) + case(psb_dupl_ovwrt_) + k = 0 + i = 1 + do j=1, nr + + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,jas(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + val(k) = vs(i) + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case(psb_dupl_add_) + k = 0 + i = 1 + do j=1, nr + + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,jas(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + val(k) = val(k) + vs(i) + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case(psb_dupl_err_) + k = 0 + i = 1 + do j=1, nr + + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,jas(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case default + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + return + end select + + nzout = k + + deallocate(ix2, stat=info) +#endif + + deallocate(ias,jas,vs, stat=info) + + else if (.not.use_buffers) then + + ! + ! If we did not have enough memory for buffers, + ! let's try in place. + ! + call psi_msort_up(nzin,ia(1:),iaux(1:),iret) + if (iret == 0) & + & call psb_ip_reord(nzin,val,ia,ja,iaux) +#if defined(OPENMP) + !$OMP PARALLEL default(none) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & + !$OMP private(i,j,first_idx,last_idx,nzl,act_row,iret,ithread, & + !$OMP work,first_elem,last_elem) + + !$OMP SINGLE + nthreads = omp_get_num_threads() + !$OMP END SINGLE + + ithread = omp_get_thread_num() + + ! -------- thread-specific workload -------- + + work = nr/nthreads + if (ithread < MOD(nr,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nr,nthreads) + 1 + end if + + last_idx = first_idx + work - 1 + + ! --------------------------------------------------- + + first_elem = 0 + last_elem = -1 + act_row = first_idx + do j=1,nzin + if (ia(j) < act_row) then + cycle + else if ((ia(j) > last_idx) .or. (work < 1)) then + exit + else if (ia(j) > act_row) then + nzl = last_elem - first_elem + 1 + + if (nzl > 0) then + call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),& + & iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) + end if + + act_row = act_row + 1 + first_elem = 0 + last_elem = -1 + else + if (first_elem == 0) then + first_elem = j + end if + + last_elem = j + end if + end do + !$OMP END PARALLEL +#else + i = 1 + j = i + do while (i <= nzin) + + do while ((ia(j) == ia(i))) + j = j+1 + if (j > nzin) exit + enddo + nzl = j - i + call psi_msort_up(nzl,ja(i:),iaux(1:),iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(i:i+nzl-1),& + & ia(i:i+nzl-1),ja(i:i+nzl-1),iaux) + i = j + enddo +#endif + + select case(dupl) + case(psb_dupl_ovwrt_) + + i = 1 + irw = ia(i) + icl = ja(i) + j = 1 + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + val(i) = val(j) + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + + case(psb_dupl_add_) + + i = 1 + irw = ia(i) + icl = ja(i) + j = 1 + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + val(i) = val(i) + val(j) + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + + case(psb_dupl_err_) + i = 1 + irw = ia(i) + icl = ja(i) + j = 1 + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + case default + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + end select + + nzout = i + + endif + + if(debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': end second loop' + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_fix_coo_inner_rowmajor + +subroutine psb_c_fix_coo_inner_colmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,info) + use psb_const_mod + use psb_error_mod + use psb_c_base_mat_mod, psb_protect_name => psb_c_fix_coo_inner_colmajor + use psb_string_mod + use psb_ip_reord_mod + use psb_sort_mod +#if defined(OPENMP) + use omp_lib +#endif + implicit none + + integer(psb_ipk_), intent(in) :: nr, nc, nzin, dupl + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:) + complex(psb_spk_), intent(inout) :: val(:) + integer(psb_ipk_), intent(out) :: nzout, info + !locals + integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) + complex(psb_spk_), allocatable :: vs(:) + integer(psb_ipk_) :: nza, nzl,iret + integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name = 'psb_fixcoo' + logical :: srt_inp, use_buffers + if (nc <= nzin) then + ! Avoid strange situations with large indices +#if defined(OPENMP) + allocate(ias(nzin),jas(nzin),vs(nzin), stat=info) +#else + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) +#endif + use_buffers = (info == 0) + else + use_buffers = .false. + end if + + if (use_buffers) then + iaux(:) = 0 +#if defined(OPENMP) + !$OMP PARALLEL DO default(none) schedule(STATIC) & + !$OMP shared(nzin,ja,nc,iaux) & + !$OMP private(i) & + !$OMP reduction(.and.:use_buffers) + do i=1,nzin + if ((ja(i) < 1).or.(ja(i) > nc)) then + use_buffers = .false. + ! Invalid indices are placed outside the considered range + ja(i) = nc+2 + else + !$OMP ATOMIC UPDATE + iaux(ja(i)) = iaux(ja(i)) + 1 + end if + end do + !$OMP END PARALLEL DO +#else + !srt_inp = .true. + do i=1,nzin + if ((ja(i) < 1).or.(ja(i) > nc)) then + use_buffers = .false. + !ja(i) = nc+2 + !srt_inp = .false. + exit + end if + + iaux(ja(i)) = iaux(ja(i)) + 1 + + !srt_inp = srt_inp .and.(ja(i-1)<=ja(i)) + end do +#endif + end if + + !use_buffers=use_buffers.and.srt_inp + + ! Check again use_buffers. We enter here if nzin >= nc and + ! all the indices are valid + if (use_buffers) then +#if defined(OPENMP) + allocate(kaux(MAX(nzin,nc)+2),idxaux(MAX((nr+2)*maxthreads,nc)),sum(maxthreads+1),stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + kaux(:) = 0 + sum(:) = 0 + sum(1) = 1 + err = 0 + + ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting + ! index for each column. We do the same on 'kaux' + !$OMP PARALLEL default(none) & + !$OMP shared(idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP private(s,i,j,k,ithread,first_idx,last_idx,work,nxt_val,old_val,saved_elem, & + !$OMP first_elem,last_elem,nzl,iret,act_col) + + !$OMP SINGLE + nthreads = omp_get_num_threads() + !$OMP END SINGLE + + ithread = omp_get_thread_num() + + ! -------- thread-specific workload -------- + + work = nc/nthreads + if (ithread < MOD(nc,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nc,nthreads) + 1 + end if + + last_idx = first_idx + work - 1 + + !------------------------------------------- + ! ---------- iaux composition -------------- + + s = 0 + do i=first_idx,last_idx + s = s + iaux(i) + end do + sum(ithread+2) = s + + !$OMP BARRIER + + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + !$OMP END SINGLE + + if (work > 0) then + saved_elem = iaux(first_idx) + end if + if (ithread == 0) then + iaux(1) = 1 + end if + + !$OMP BARRIER + + if (work > 0) then + old_val = iaux(first_idx+1) + iaux(first_idx+1) = saved_elem + sum(ithread+1) + end if + + do i=first_idx+2,last_idx+1 + nxt_val = iaux(i) + iaux(i) = iaux(i-1) + old_val + old_val = nxt_val + end do + + ! -------------------------------------- + + !$OMP BARRIER + + ! ------------------ Sorting and buffers ------------------- + + ! Let's use an auxiliary buffer to get indices + do j=first_idx,last_idx + idxaux(j) = iaux(j) + end do + + ! Here we sort data inside the auxiliary buffers + do i=1,nzin + act_col = ja(i) + if (act_col >= first_idx .and. act_col <= last_idx) then + ias(idxaux(act_col)) = ia(i) + jas(idxaux(act_col)) = ja(i) + vs(idxaux(act_col)) = val(i) + + idxaux(act_col) = idxaux(act_col) + 1 + end if + end do + + !$OMP BARRIER + + ! Let's sort column indices and values. After that we will store + ! the number of unique values in 'kaux' + do j=first_idx,last_idx + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + nzl = last_elem - first_elem + 1 + + ! The column has elements? + if (nzl > 0) then + call psi_msort_up(nzl,ias(first_elem:last_elem), & + & idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret) + + if (iret == 0) then + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) + end if + + ! Over each column we count the unique values + kaux(j) = 1 + do i=first_elem+1,last_elem + if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then + cycle + end if + kaux(j) = kaux(j) + 1 + end do + end if + end do + + ! -------------------------------------------------- + + ! ---------------- kaux composition ---------------- + + !$OMP SINGLE + sum(:) = 0 + sum(1) = 1 + !$OMP END SINGLE + + s = 0 + do i=first_idx,last_idx + s = s + kaux(i) + end do + sum(ithread+2) = s + + !$OMP BARRIER + + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + !$OMP END SINGLE + + if (work > 0) then + saved_elem = kaux(first_idx) + end if + if (ithread == 0) then + kaux(1) = 1 + end if + + !$OMP BARRIER + + if (work > 0) then + old_val = kaux(first_idx+1) + kaux(first_idx+1) = saved_elem + sum(ithread+1) + end if + + do i=first_idx+2,last_idx+1 + nxt_val = kaux(i) + kaux(i) = kaux(i-1) + old_val + old_val = nxt_val + end do + + !$OMP BARRIER + + ! ------------------------------------------------ + + select case(dupl) + case(psb_dupl_ovwrt_) + !$OMP DO schedule(STATIC) + do j=1,nc + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + val(k) = vs(i) + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + endif + end do + end do + !$OMP END DO + + case(psb_dupl_add_) + !$OMP DO schedule(STATIC) + do j=1,nc + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + val(k) = val(k) + vs(i) + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + endif + end do + end do + !$OMP END DO + + case(psb_dupl_err_) + !$OMP DO schedule(STATIC) + do j=1,nc + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + err = 1 + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + end if + end do + end do + !$OMP END DO + + case default + !$OMP SINGLE + err = 2 + !$OMP END SINGLE + end select + + !$OMP END PARALLEL + + if (err == 1) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else if (err == 2) then + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + return + end if + + nzout = kaux(nc+1) - 1 + + deallocate(sum,kaux,idxaux,stat=info) +#else + !if (.not.srt_inp) then + ip = iaux(1) + iaux(1) = 0 + do i=2, nc + is = iaux(i) + iaux(i) = ip + ip = ip + is + end do + iaux(nc+1) = ip + + do i=1,nzin + icl = ja(i) + ip = iaux(icl) + 1 + ias(ip) = ia(i) + jas(ip) = ja(i) + vs(ip) = val(i) + iaux(icl) = ip + end do + !end if + + select case(dupl) + case(psb_dupl_ovwrt_) + k = 0 + i = 1 + do j=1, nc + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,ias(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + val(k) = vs(i) + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case(psb_dupl_add_) + k = 0 + i = 1 + do j=1, nc + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,ias(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + val(k) = val(k) + vs(i) + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case(psb_dupl_err_) + k = 0 + i = 1 + do j=1, nc + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,ias(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case default + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + return + end select + + nzout = k + + deallocate(ix2, stat=info) +#endif + + deallocate(ias,jas,vs, stat=info) + + else if (.not.use_buffers) then + + call psi_msort_up(nzin,ja(1:),iaux(1:),iret) + if (iret == 0) & + & call psb_ip_reord(nzin,val,ia,ja,iaux) +#if defined(OPENMP) + !$OMP PARALLEL default(none) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & + !$OMP private(i,j,first_idx,last_idx,nzl,act_col, & + !$OMP iret,ithread,work,first_elem,last_elem) + + !$OMP SINGLE + nthreads = omp_get_num_threads() + !$OMP END SINGLE + + ithread = omp_get_thread_num() + + ! -------- thread-specific workload -------- + + work = nc/nthreads + if (ithread < MOD(nc,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nc,nthreads) + 1 + end if + + last_idx = first_idx + work - 1 + + ! --------------------------------------------------- + + first_elem = 0 + last_elem = -1 + act_col = first_idx + do j=1,nzin + if (ja(j) < act_col) then + cycle + else if ((ja(j) > last_idx) .or. (work < 1)) then + exit + else if (ja(j) > act_col) then + nzl = last_elem - first_elem + 1 + + if (nzl > 0) then + call psi_msort_up(nzl,ia(first_elem:),iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),& + & iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) + end if + + act_col = act_col + 1 + first_elem = 0 + last_elem = -1 + else + if (first_elem == 0) then + first_elem = j + end if + + last_elem = j + end if + end do + !$OMP END PARALLEL +#else + i = 1 + j = i + do while (i <= nzin) + do while ((ja(j) == ja(i))) + j = j+1 + if (j > nzin) exit + enddo + nzl = j - i + call psi_msort_up(nzl,ia(i:),iaux(1:),iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(i:i+nzl-1),& + & ia(i:i+nzl-1),ja(i:i+nzl-1),iaux) + i = j + enddo +#endif + + i = 1 + irw = ia(i) + icl = ja(i) + j = 1 + + + select case(dupl) + case(psb_dupl_ovwrt_) + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + val(i) = val(j) + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + + case(psb_dupl_add_) + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + val(i) = val(i) + val(j) + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + + case(psb_dupl_err_) + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + case default + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + end select + + nzout = i + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': end second loop' + + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_fix_coo_inner_colmajor + + +subroutine psb_c_cp_coo_to_lcoo(a,b,info) + use psb_error_mod + use psb_c_base_mat_mod, psb_protect_name => psb_c_cp_coo_to_lcoo + implicit none + class(psb_c_coo_sparse_mat), intent(in) :: a + class(psb_lc_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + integer(psb_lpk_) :: nz + character(len=20) :: name='to_coo' + logical, parameter :: debug=.false. + + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + b%psb_lbase_sparse_mat = a%psb_base_sparse_mat + call b%set_sort_status(a%get_sort_status()) + nz = a%get_nzeros() + call b%set_nzeros(nz) + call b%reallocate(nz) + +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + b%ia(i) = a%ia(i) + b%ja(i) = a%ja(i) + b%val(i) = a%val(i) + end do + end block +#else + b%ia(1:nz) = a%ia(1:nz) + b%ja(1:nz) = a%ja(1:nz) + b%val(1:nz) = a%val(1:nz) +#endif + call b%set_host() + + if (.not.b%is_by_rows()) call b%fix(info) + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cp_coo_to_lcoo + +subroutine psb_c_cp_coo_from_lcoo(a,b,info) + use psb_error_mod + use psb_c_base_mat_mod, psb_protect_name => psb_c_cp_coo_from_lcoo + implicit none + class(psb_c_coo_sparse_mat), intent(inout) :: a + class(psb_lc_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='from_coo' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: m,n,nz + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + a%psb_base_sparse_mat = b%psb_lbase_sparse_mat + call a%set_sort_status(b%get_sort_status()) + nz = b%get_nzeros() + call a%set_nzeros(nz) + call a%reallocate(nz) + +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + a%ia(i) = b%ia(i) + a%ja(i) = b%ja(i) + a%val(i) = b%val(i) + end do + end block +#else + a%ia(1:nz) = b%ia(1:nz) + a%ja(1:nz) = b%ja(1:nz) + a%val(1:nz) = b%val(1:nz) +#endif + call a%set_host() + + if (.not.a%is_by_rows()) call a%fix(info) + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name) + + call psb_error_handler(err_act) + + return + +end subroutine psb_c_cp_coo_from_lcoo + + +! +! +! lc coo impl +! +! + +subroutine psb_lc_coo_get_diag(a,d,info) + use psb_c_base_mat_mod, psb_protect_name => psb_lc_coo_get_diag + use psb_error_mod + use psb_const_mod + implicit none + class(psb_lc_coo_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + integer(psb_lpk_) :: mnm, i, j + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) + goto 9999 + end if + + if (a%is_unit()) then + d(1:mnm) = cone + else + d(1:mnm) = czero + do i=1,a%get_nzeros() + j=a%ia(i) + if ((j == a%ja(i)) .and.(j <= mnm ) .and.(j>0)) then + d(j) = a%val(i) + endif + enddo + end if + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_lc_coo_get_diag + +subroutine psb_lc_coo_scal(d,a,info,side) + use psb_c_base_mat_mod, psb_protect_name => psb_lc_coo_scal + use psb_error_mod + use psb_const_mod + use psb_string_mod + implicit none + class(psb_lc_coo_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + integer(psb_lpk_) :: mnm, i, j, m + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) + goto 9999 + end if + + do i=1,a%get_nzeros() + j = a%ia(i) + a%val(i) = a%val(i) * d(j) + enddo + else + m = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) + goto 9999 + end if do i=1,a%get_nzeros() j = a%ja(i) @@ -5182,13 +6530,13 @@ function psb_lc_coo_maxval(a) result(res) implicit none class(psb_lc_coo_sparse_mat), intent(in) :: a real(psb_spk_) :: res - + integer(psb_lpk_) :: i,j,k,m,n, nnz, ir, jc, nc, info character(len=20) :: name='c_coo_maxval' logical, parameter :: debug=.false. - + if (a%is_dev()) call a%sync() - + if (a%is_unit()) then res = sone else @@ -5198,18 +6546,18 @@ function psb_lc_coo_maxval(a) result(res) if (allocated(a%val)) then nnz = min(nnz,size(a%val)) #if defined(OPENMP) - block - integer(psb_ipk_) :: i - !$omp parallel do private(i) reduction(max: res) - do i=1, nnz - res = max(res,abs(a%val(i))) - end do - end block + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) reduction(max: res) + do i=1, nnz + res = max(res,abs(a%val(i))) + end do + end block #else res = maxval(abs(a%val(1:nnz))) #endif end if - + end function psb_lc_coo_maxval function psb_lc_coo_csnmi(a) result(res) @@ -5265,13 +6613,13 @@ function psb_lc_coo_csnmi(a) result(res) vt(i) = vt(i) + abs(a%val(j)) end do #if defined(OPENMP) - block - integer(psb_ipk_) :: i - !$omp parallel do private(i) reduction(max: res) - do i=1, m - res = max(res,abs(vt(i))) - end do - end block + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) reduction(max: res) + do i=1, m + res = max(res,abs(vt(i))) + end do + end block #else res = maxval(vt(1:m)) #endif @@ -5321,7 +6669,7 @@ function psb_lc_coo_csnm1(a) result(res) do i=1, n res = max(res,abs(vt(i))) end do - end block + end block #else res = maxval(vt(1:n)) #endif @@ -6770,7 +8118,7 @@ subroutine psb_lc_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (nz < 0) then info = psb_err_iarg_neg_ -3 call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) +3 call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) goto 9999 end if if (size(ia) < nz) then diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index 2de6702b..888d8a8b 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -728,7 +728,7 @@ subroutine psb_d_coo_print(iout,a,iv,head,ivr,ivc) character(len=80) :: frmt integer(psb_ipk_) :: i,j, ni, nr, nc, nz - write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' if (present(head)) write(iout,'(a,a)') '% ',head write(iout,'(a)') '%' write(iout,'(a,a)') '% COO' @@ -3172,9 +3172,9 @@ subroutine psb_d_cp_coo_from_coo(a,b,info) integer(psb_ipk_) :: i !$omp parallel do private(i) do i=1, nz - a%ia(i) = b%ia(i) - a%ja(i) = b%ja(i) - a%val(i) = b%val(i) + a%ia(i) = b%ia(i) + a%ja(i) = b%ja(i) + a%val(i) = b%val(i) end do end block #else @@ -3615,6 +3615,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) select case(idir_) case(psb_row_major_) +#if 0 ! Row major order if (nr <= nzin) then ! Avoid strange situations with large indices @@ -3730,7 +3731,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) end if if (ithread == 0) then - iaux(1) = 1 + iaux(1) = 1 end if !$OMP BARRIER @@ -3782,11 +3783,11 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) ! The row has elements? if (nzl > 0) then call psi_msort_up(nzl,jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) + & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) if (iret == 0) then - call psb_ip_reord(nzl,vs(first_elem:last_elem),& - & ias(first_elem:last_elem),jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) end if ! Over each row we count the unique values @@ -3804,8 +3805,8 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) ! ---------------- kaux composition ---------------- !$OMP SINGLE - sum(:) = 0 - sum(1) = 1 + sum(:) = 0 + sum(1) = 1 !$OMP END SINGLE s = 0 @@ -3957,7 +3958,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) deallocate(sum,kaux,idxaux,stat=info) #else !if (.not.srt_inp) then - + ip = iaux(1) iaux(1) = 0 do i=2, nr @@ -4086,7 +4087,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) enddo end if end do - + case default write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl_ info =-7 @@ -4097,7 +4098,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) deallocate(ix2, stat=info) #endif - + deallocate(ias,jas,vs, stat=info) else if (.not.use_buffers) then @@ -4261,9 +4262,12 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) if(debug_level >= psb_debug_serial_)& & write(debug_unit,*) trim(name),': end second loop' - +#else + call psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,& + & ia,ja,val,iaux,nzout,info) +#endif case(psb_col_major_) - +#if 0 if (nc <= nzin) then ! Avoid strange situations with large indices #if defined(OPENMP) @@ -4375,7 +4379,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) saved_elem = iaux(first_idx) end if if (ithread == 0) then - iaux(1) = 1 + iaux(1) = 1 end if !$OMP BARRIER @@ -4429,9 +4433,9 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) & idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret) if (iret == 0) then - call psb_ip_reord(nzl,vs(first_elem:last_elem),& - & ias(first_elem:last_elem),jas(first_elem:last_elem), & - & idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) end if ! Over each column we count the unique values @@ -4490,9 +4494,9 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) !$OMP BARRIER - ! ------------------------------------------------ + ! ------------------------------------------------ - select case(dupl_) + select case(dupl_) case(psb_dupl_ovwrt_) !$OMP DO schedule(STATIC) do j=1,nc @@ -4620,7 +4624,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) iaux(icl) = ip end do !end if - + select case(dupl_) case(psb_dupl_ovwrt_) k = 0 @@ -4733,7 +4737,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) info =-7 return end select - + nzout = k deallocate(ix2, stat=info) @@ -4786,9 +4790,9 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) if (nzl > 0) then call psi_msort_up(nzl,ia(first_elem:),iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret) if (iret == 0) & - & call psb_ip_reord(nzl,val(first_elem:last_elem),& - & ia(first_elem:last_elem),ja(first_elem:last_elem),& - & iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),& + & iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) end if act_col = act_col + 1 @@ -4889,7 +4893,10 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) & write(debug_unit,*) trim(name),': end second loop' end if - +#else + call psb_d_fix_coo_inner_colmajor(nr,nc,nzin,dupl,& + & ia,ja,val,iaux,nzout,info) +#endif case default write(debug_unit,*) trim(name),': unknown direction ',idir_ info = psb_err_internal_error_ @@ -4908,219 +4915,1560 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) end subroutine psb_d_fix_coo_inner -subroutine psb_d_cp_coo_to_lcoo(a,b,info) +subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,info) + use psb_const_mod use psb_error_mod - use psb_d_base_mat_mod, psb_protect_name => psb_d_cp_coo_to_lcoo + use psb_d_base_mat_mod, psb_protect_name => psb_d_fix_coo_inner_rowmajor + use psb_string_mod + use psb_ip_reord_mod + use psb_sort_mod +#if defined(OPENMP) + use omp_lib +#endif implicit none - class(psb_d_coo_sparse_mat), intent(in) :: a - class(psb_ld_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - - integer(psb_ipk_) :: err_act - integer(psb_lpk_) :: nz - character(len=20) :: name='to_coo' - logical, parameter :: debug=.false. + integer(psb_ipk_), intent(in) :: nr, nc, nzin, dupl + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:) + real(psb_dpk_), intent(inout) :: val(:) + integer(psb_ipk_), intent(out) :: nzout, info + !locals + integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) + real(psb_dpk_), allocatable :: vs(:) + integer(psb_ipk_) :: nza, nzl,iret + integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name = 'psb_fixcoo' + logical :: srt_inp, use_buffers - call psb_erractionsave(err_act) - info = psb_success_ - if (a%is_dev()) call a%sync() - - b%psb_lbase_sparse_mat = a%psb_base_sparse_mat - call b%set_sort_status(a%get_sort_status()) - nz = a%get_nzeros() - call b%set_nzeros(nz) - call b%reallocate(nz) + ! Row major order + if (nr <= nzin) then + ! Avoid strange situations with large indices +#if defined(OPENMP) + ! We are not going to need 'ix2' because of the presence + ! of 'idxaux' as auxiliary buffer. + allocate(ias(nzin),jas(nzin),vs(nzin), stat=info) +#else + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) +#endif + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then + iaux(:) = 0 #if defined(OPENMP) - block - integer(psb_ipk_) :: i - !$omp parallel do private(i) - do i=1, nz - b%ia(i) = a%ia(i) - b%ja(i) = a%ja(i) - b%val(i) = a%val(i) + !$OMP PARALLEL DO default(none) schedule(STATIC) & + !$OMP shared(nzin,ia,nr,iaux) & + !$OMP private(i) & + !$OMP reduction(.and.:use_buffers) + do i=1,nzin + if ((ia(i) < 1).or.(ia(i) > nr)) then + use_buffers = .false. + ! Invalid indices are placed outside the considered range + ia(i) = nr+2 + else + !$OMP ATOMIC UPDATE + iaux(ia(i)) = iaux(ia(i)) + 1 + end if end do - end block + !$OMP END PARALLEL DO #else - b%ia(1:nz) = a%ia(1:nz) - b%ja(1:nz) = a%ja(1:nz) - b%val(1:nz) = a%val(1:nz) + !srt_inp = .true. + do i=1,nzin + if ((ia(i) < 1).or.(ia(i) > nr)) then + use_buffers = .false. + !ia(i) = nr+2 + !srt_inp = .false. + exit + end if + + iaux(ia(i)) = iaux(ia(i)) + 1 + + !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) + end do #endif - call b%set_host() + end if - if (.not.b%is_by_rows()) call b%fix(info) + ! Check again use_buffers. We enter here if nzin >= nr and + ! all the indices are valid + ! Check again use_buffers. + if (use_buffers) then +#if defined(OPENMP) + allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if - if (info /= psb_success_) goto 9999 + kaux(:) = 0 + sum(:) = 0 + sum(1) = 1 + err = 0 - call psb_erractionrestore(err_act) - return + ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting + ! index for each row. We do the same on 'kaux' + !$OMP PARALLEL default(none) & + !$OMP shared(idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP private(s,i,j,k,ithread,first_idx,last_idx,work,nxt_val,old_val,saved_elem, & + !$OMP first_elem,last_elem,nzl,iret,act_row) -9999 call psb_error_handler(err_act) + !$OMP SINGLE + nthreads = omp_get_num_threads() + !$OMP END SINGLE - return + ithread = omp_get_thread_num() -end subroutine psb_d_cp_coo_to_lcoo + ! -------- thread-specific workload -------- -subroutine psb_d_cp_coo_from_lcoo(a,b,info) - use psb_error_mod - use psb_d_base_mat_mod, psb_protect_name => psb_d_cp_coo_from_lcoo - implicit none - class(psb_d_coo_sparse_mat), intent(inout) :: a - class(psb_ld_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info + work = nr/nthreads + if (ithread < MOD(nr,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nr,nthreads) + 1 + end if - integer(psb_ipk_) :: err_act - character(len=20) :: name='from_coo' - logical, parameter :: debug=.false. - integer(psb_ipk_) :: m,n,nz + last_idx = first_idx + work - 1 - call psb_erractionsave(err_act) - info = psb_success_ - if (b%is_dev()) call b%sync() - a%psb_base_sparse_mat = b%psb_lbase_sparse_mat - call a%set_sort_status(b%get_sort_status()) - nz = b%get_nzeros() - call a%set_nzeros(nz) - call a%reallocate(nz) + !------------------------------------------- + ! ------------ iaux composition -------------- + ! 'iaux' will have the start index for each row -#if defined(OPENMP) - block - integer(psb_ipk_) :: i - !$omp parallel do private(i) - do i=1, nz - a%ia(i) = b%ia(i) - a%ja(i) = b%ja(i) - a%val(i) = b%val(i) + s = 0 + do i=first_idx,last_idx + s = s + iaux(i) end do - end block -#else - a%ia(1:nz) = b%ia(1:nz) - a%ja(1:nz) = b%ja(1:nz) - a%val(1:nz) = b%val(1:nz) -#endif - call a%set_host() + sum(ithread+2) = s - if (.not.a%is_by_rows()) call a%fix(info) - - if (info /= psb_success_) goto 9999 + !$OMP BARRIER - call psb_erractionrestore(err_act) - return + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + !$OMP END SINGLE -9999 continue - call psb_errpush(info,name) + if (work > 0) then + saved_elem = iaux(first_idx) + end if - call psb_error_handler(err_act) + if (ithread == 0) then + iaux(1) = 1 + end if - return + !$OMP BARRIER -end subroutine psb_d_cp_coo_from_lcoo + if (work > 0) then + old_val = iaux(first_idx+1) + iaux(first_idx+1) = saved_elem + sum(ithread+1) + end if + do i=first_idx+2,last_idx+1 + nxt_val = iaux(i) + iaux(i) = iaux(i-1) + old_val + old_val = nxt_val + end do -! -! -! ld coo impl -! -! + ! -------------------------------------- -subroutine psb_ld_coo_get_diag(a,d,info) - use psb_d_base_mat_mod, psb_protect_name => psb_ld_coo_get_diag - use psb_error_mod - use psb_const_mod - implicit none - class(psb_ld_coo_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(out) :: d(:) - integer(psb_ipk_), intent(out) :: info + !$OMP BARRIER - integer(psb_ipk_) :: err_act - integer(psb_lpk_) :: mnm, i, j - character(len=20) :: name='get_diag' - logical, parameter :: debug=.false. + ! ------------------ Sorting and buffers ------------------- - info = psb_success_ - call psb_erractionsave(err_act) - if (a%is_dev()) call a%sync() + ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving + ! unmodified 'iaux' + do j=first_idx,last_idx + idxaux(j) = iaux(j) + end do - mnm = min(a%get_nrows(),a%get_ncols()) - if (size(d) < mnm) then - info=psb_err_input_asize_invalid_i_ - call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) - goto 9999 - end if + ! Here we sort data inside the auxiliary buffers + do i=1,nzin + act_row = ia(i) + if ((act_row >= first_idx) .and. (act_row <= last_idx)) then + ias(idxaux(act_row)) = ia(i) + jas(idxaux(act_row)) = ja(i) + vs(idxaux(act_row)) = val(i) - if (a%is_unit()) then - d(1:mnm) = done - else - d(1:mnm) = dzero - do i=1,a%get_nzeros() - j=a%ia(i) - if ((j == a%ja(i)) .and.(j <= mnm ) .and.(j>0)) then - d(j) = a%val(i) - endif - enddo - end if - call psb_erractionrestore(err_act) - return + idxaux(act_row) = idxaux(act_row) + 1 + end if + end do -9999 call psb_error_handler(err_act) + !$OMP BARRIER + + ! Let's sort column indices and values. After that we will store + ! the number of unique values in 'kaux' + do j=first_idx,last_idx + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + nzl = last_elem - first_elem + 1 + + ! The row has elements? + if (nzl > 0) then + call psi_msort_up(nzl,jas(first_elem:last_elem), & + & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) + if (iret == 0) then + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) + end if - return + ! Over each row we count the unique values + kaux(j) = 1 + do i=first_elem+1,last_elem + if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then + cycle + end if + kaux(j) = kaux(j) + 1 + end do + end if + end do -end subroutine psb_ld_coo_get_diag + ! -------------------------------------------------- + ! ---------------- kaux composition ---------------- -subroutine psb_ld_coo_scal(d,a,info,side) - use psb_d_base_mat_mod, psb_protect_name => psb_ld_coo_scal - use psb_error_mod - use psb_const_mod - use psb_string_mod - implicit none - class(psb_ld_coo_sparse_mat), intent(inout) :: a - real(psb_dpk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in), optional :: side + !$OMP SINGLE + sum(:) = 0 + sum(1) = 1 + !$OMP END SINGLE - integer(psb_ipk_) :: err_act - integer(psb_lpk_) :: mnm, i, j, m - character(len=20) :: name='scal' - character :: side_ - logical :: left - logical, parameter :: debug=.false. + s = 0 + do i=first_idx,last_idx + s = s + kaux(i) + end do + sum(ithread+2) = s - info = psb_success_ - call psb_erractionsave(err_act) - if (a%is_dev()) call a%sync() + !$OMP BARRIER - if (a%is_unit()) then - call a%make_nonunit() - end if + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + kaux(nr+1) = sum(nthreads+1) + !$OMP END SINGLE - side_ = 'L' - if (present(side)) then - side_ = psb_toupper(side) - end if + if (work > 0) then + saved_elem = kaux(first_idx) + end if + if (ithread == 0) then + kaux(1) = 1 + end if - left = (side_ == 'L') + !$OMP BARRIER - if (left) then - m = a%get_nrows() - if (size(d) < m) then - info=psb_err_input_asize_invalid_i_ - call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) - goto 9999 + if (work > 0) then + old_val = kaux(first_idx+1) + kaux(first_idx+1) = saved_elem + sum(ithread+1) end if - do i=1,a%get_nzeros() - j = a%ia(i) - a%val(i) = a%val(i) * d(j) - enddo - else - m = a%get_ncols() - if (size(d) < m) then - info=psb_err_input_asize_invalid_i_ - call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) - goto 9999 - end if + do i=first_idx+2,last_idx+1 + nxt_val = kaux(i) + kaux(i) = kaux(i-1) + old_val + old_val = nxt_val + end do + + !$OMP BARRIER + + ! ------------------------------------------------ + + select case(dupl) + case(psb_dupl_ovwrt_) + !$OMP DO schedule(STATIC) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + val(k) = vs(i) + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + endif + end do + end do + !$OMP END DO + + case(psb_dupl_add_) + !$OMP DO schedule(STATIC) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + val(k) = val(k) + vs(i) + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + endif + end do + end do + !$OMP END DO + + case(psb_dupl_err_) + !$OMP DO schedule(STATIC) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + err = 1 + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + end if + end do + end do + !$OMP END DO + + case default + !$OMP SINGLE + err = 2 + !$OMP END SINGLE + end select + + !$OMP END PARALLEL + + if (err == 1) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else if (err == 2) then + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + return + end if + + nzout = kaux(nr+1) - 1 + + deallocate(sum,kaux,idxaux,stat=info) +#else + !if (.not.srt_inp) then + + ip = iaux(1) + iaux(1) = 0 + do i=2, nr + is = iaux(i) + iaux(i) = ip + ip = ip + is + end do + iaux(nr+1) = ip + + do i=1,nzin + irw = ia(i) + ip = iaux(irw) + 1 + ias(ip) = ia(i) + jas(ip) = ja(i) + vs(ip) = val(i) + iaux(irw) = ip + end do + !end if + + select case(dupl) + case(psb_dupl_ovwrt_) + k = 0 + i = 1 + do j=1, nr + + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,jas(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + val(k) = vs(i) + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case(psb_dupl_add_) + k = 0 + i = 1 + do j=1, nr + + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,jas(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + val(k) = val(k) + vs(i) + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case(psb_dupl_err_) + k = 0 + i = 1 + do j=1, nr + + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,jas(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case default + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + return + end select + + nzout = k + + deallocate(ix2, stat=info) +#endif + + deallocate(ias,jas,vs, stat=info) + + else if (.not.use_buffers) then + + ! + ! If we did not have enough memory for buffers, + ! let's try in place. + ! + call psi_msort_up(nzin,ia(1:),iaux(1:),iret) + if (iret == 0) & + & call psb_ip_reord(nzin,val,ia,ja,iaux) +#if defined(OPENMP) + !$OMP PARALLEL default(none) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & + !$OMP private(i,j,first_idx,last_idx,nzl,act_row,iret,ithread, & + !$OMP work,first_elem,last_elem) + + !$OMP SINGLE + nthreads = omp_get_num_threads() + !$OMP END SINGLE + + ithread = omp_get_thread_num() + + ! -------- thread-specific workload -------- + + work = nr/nthreads + if (ithread < MOD(nr,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nr,nthreads) + 1 + end if + + last_idx = first_idx + work - 1 + + ! --------------------------------------------------- + + first_elem = 0 + last_elem = -1 + act_row = first_idx + do j=1,nzin + if (ia(j) < act_row) then + cycle + else if ((ia(j) > last_idx) .or. (work < 1)) then + exit + else if (ia(j) > act_row) then + nzl = last_elem - first_elem + 1 + + if (nzl > 0) then + call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),& + & iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) + end if + + act_row = act_row + 1 + first_elem = 0 + last_elem = -1 + else + if (first_elem == 0) then + first_elem = j + end if + + last_elem = j + end if + end do + !$OMP END PARALLEL +#else + i = 1 + j = i + do while (i <= nzin) + + do while ((ia(j) == ia(i))) + j = j+1 + if (j > nzin) exit + enddo + nzl = j - i + call psi_msort_up(nzl,ja(i:),iaux(1:),iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(i:i+nzl-1),& + & ia(i:i+nzl-1),ja(i:i+nzl-1),iaux) + i = j + enddo +#endif + + select case(dupl) + case(psb_dupl_ovwrt_) + + i = 1 + irw = ia(i) + icl = ja(i) + j = 1 + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + val(i) = val(j) + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + + case(psb_dupl_add_) + + i = 1 + irw = ia(i) + icl = ja(i) + j = 1 + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + val(i) = val(i) + val(j) + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + + case(psb_dupl_err_) + i = 1 + irw = ia(i) + icl = ja(i) + j = 1 + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + case default + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + end select + + nzout = i + + endif + + if(debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': end second loop' + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_fix_coo_inner_rowmajor + +subroutine psb_d_fix_coo_inner_colmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,info) + use psb_const_mod + use psb_error_mod + use psb_d_base_mat_mod, psb_protect_name => psb_d_fix_coo_inner_colmajor + use psb_string_mod + use psb_ip_reord_mod + use psb_sort_mod +#if defined(OPENMP) + use omp_lib +#endif + implicit none + + integer(psb_ipk_), intent(in) :: nr, nc, nzin, dupl + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:) + real(psb_dpk_), intent(inout) :: val(:) + integer(psb_ipk_), intent(out) :: nzout, info + !locals + integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) + real(psb_dpk_), allocatable :: vs(:) + integer(psb_ipk_) :: nza, nzl,iret + integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name = 'psb_fixcoo' + logical :: srt_inp, use_buffers + if (nc <= nzin) then + ! Avoid strange situations with large indices +#if defined(OPENMP) + allocate(ias(nzin),jas(nzin),vs(nzin), stat=info) +#else + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) +#endif + use_buffers = (info == 0) + else + use_buffers = .false. + end if + + if (use_buffers) then + iaux(:) = 0 +#if defined(OPENMP) + !$OMP PARALLEL DO default(none) schedule(STATIC) & + !$OMP shared(nzin,ja,nc,iaux) & + !$OMP private(i) & + !$OMP reduction(.and.:use_buffers) + do i=1,nzin + if ((ja(i) < 1).or.(ja(i) > nc)) then + use_buffers = .false. + ! Invalid indices are placed outside the considered range + ja(i) = nc+2 + else + !$OMP ATOMIC UPDATE + iaux(ja(i)) = iaux(ja(i)) + 1 + end if + end do + !$OMP END PARALLEL DO +#else + !srt_inp = .true. + do i=1,nzin + if ((ja(i) < 1).or.(ja(i) > nc)) then + use_buffers = .false. + !ja(i) = nc+2 + !srt_inp = .false. + exit + end if + + iaux(ja(i)) = iaux(ja(i)) + 1 + + !srt_inp = srt_inp .and.(ja(i-1)<=ja(i)) + end do +#endif + end if + + !use_buffers=use_buffers.and.srt_inp + + ! Check again use_buffers. We enter here if nzin >= nc and + ! all the indices are valid + if (use_buffers) then +#if defined(OPENMP) + allocate(kaux(MAX(nzin,nc)+2),idxaux(MAX((nr+2)*maxthreads,nc)),sum(maxthreads+1),stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + kaux(:) = 0 + sum(:) = 0 + sum(1) = 1 + err = 0 + + ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting + ! index for each column. We do the same on 'kaux' + !$OMP PARALLEL default(none) & + !$OMP shared(idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP private(s,i,j,k,ithread,first_idx,last_idx,work,nxt_val,old_val,saved_elem, & + !$OMP first_elem,last_elem,nzl,iret,act_col) + + !$OMP SINGLE + nthreads = omp_get_num_threads() + !$OMP END SINGLE + + ithread = omp_get_thread_num() + + ! -------- thread-specific workload -------- + + work = nc/nthreads + if (ithread < MOD(nc,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nc,nthreads) + 1 + end if + + last_idx = first_idx + work - 1 + + !------------------------------------------- + ! ---------- iaux composition -------------- + + s = 0 + do i=first_idx,last_idx + s = s + iaux(i) + end do + sum(ithread+2) = s + + !$OMP BARRIER + + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + !$OMP END SINGLE + + if (work > 0) then + saved_elem = iaux(first_idx) + end if + if (ithread == 0) then + iaux(1) = 1 + end if + + !$OMP BARRIER + + if (work > 0) then + old_val = iaux(first_idx+1) + iaux(first_idx+1) = saved_elem + sum(ithread+1) + end if + + do i=first_idx+2,last_idx+1 + nxt_val = iaux(i) + iaux(i) = iaux(i-1) + old_val + old_val = nxt_val + end do + + ! -------------------------------------- + + !$OMP BARRIER + + ! ------------------ Sorting and buffers ------------------- + + ! Let's use an auxiliary buffer to get indices + do j=first_idx,last_idx + idxaux(j) = iaux(j) + end do + + ! Here we sort data inside the auxiliary buffers + do i=1,nzin + act_col = ja(i) + if (act_col >= first_idx .and. act_col <= last_idx) then + ias(idxaux(act_col)) = ia(i) + jas(idxaux(act_col)) = ja(i) + vs(idxaux(act_col)) = val(i) + + idxaux(act_col) = idxaux(act_col) + 1 + end if + end do + + !$OMP BARRIER + + ! Let's sort column indices and values. After that we will store + ! the number of unique values in 'kaux' + do j=first_idx,last_idx + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + nzl = last_elem - first_elem + 1 + + ! The column has elements? + if (nzl > 0) then + call psi_msort_up(nzl,ias(first_elem:last_elem), & + & idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret) + + if (iret == 0) then + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) + end if + + ! Over each column we count the unique values + kaux(j) = 1 + do i=first_elem+1,last_elem + if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then + cycle + end if + kaux(j) = kaux(j) + 1 + end do + end if + end do + + ! -------------------------------------------------- + + ! ---------------- kaux composition ---------------- + + !$OMP SINGLE + sum(:) = 0 + sum(1) = 1 + !$OMP END SINGLE + + s = 0 + do i=first_idx,last_idx + s = s + kaux(i) + end do + sum(ithread+2) = s + + !$OMP BARRIER + + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + !$OMP END SINGLE + + if (work > 0) then + saved_elem = kaux(first_idx) + end if + if (ithread == 0) then + kaux(1) = 1 + end if + + !$OMP BARRIER + + if (work > 0) then + old_val = kaux(first_idx+1) + kaux(first_idx+1) = saved_elem + sum(ithread+1) + end if + + do i=first_idx+2,last_idx+1 + nxt_val = kaux(i) + kaux(i) = kaux(i-1) + old_val + old_val = nxt_val + end do + + !$OMP BARRIER + + ! ------------------------------------------------ + + select case(dupl) + case(psb_dupl_ovwrt_) + !$OMP DO schedule(STATIC) + do j=1,nc + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + val(k) = vs(i) + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + endif + end do + end do + !$OMP END DO + + case(psb_dupl_add_) + !$OMP DO schedule(STATIC) + do j=1,nc + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + val(k) = val(k) + vs(i) + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + endif + end do + end do + !$OMP END DO + + case(psb_dupl_err_) + !$OMP DO schedule(STATIC) + do j=1,nc + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + err = 1 + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + end if + end do + end do + !$OMP END DO + + case default + !$OMP SINGLE + err = 2 + !$OMP END SINGLE + end select + + !$OMP END PARALLEL + + if (err == 1) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else if (err == 2) then + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + return + end if + + nzout = kaux(nc+1) - 1 + + deallocate(sum,kaux,idxaux,stat=info) +#else + !if (.not.srt_inp) then + ip = iaux(1) + iaux(1) = 0 + do i=2, nc + is = iaux(i) + iaux(i) = ip + ip = ip + is + end do + iaux(nc+1) = ip + + do i=1,nzin + icl = ja(i) + ip = iaux(icl) + 1 + ias(ip) = ia(i) + jas(ip) = ja(i) + vs(ip) = val(i) + iaux(icl) = ip + end do + !end if + + select case(dupl) + case(psb_dupl_ovwrt_) + k = 0 + i = 1 + do j=1, nc + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,ias(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + val(k) = vs(i) + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case(psb_dupl_add_) + k = 0 + i = 1 + do j=1, nc + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,ias(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + val(k) = val(k) + vs(i) + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case(psb_dupl_err_) + k = 0 + i = 1 + do j=1, nc + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,ias(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case default + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + return + end select + + nzout = k + + deallocate(ix2, stat=info) +#endif + + deallocate(ias,jas,vs, stat=info) + + else if (.not.use_buffers) then + + call psi_msort_up(nzin,ja(1:),iaux(1:),iret) + if (iret == 0) & + & call psb_ip_reord(nzin,val,ia,ja,iaux) +#if defined(OPENMP) + !$OMP PARALLEL default(none) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & + !$OMP private(i,j,first_idx,last_idx,nzl,act_col, & + !$OMP iret,ithread,work,first_elem,last_elem) + + !$OMP SINGLE + nthreads = omp_get_num_threads() + !$OMP END SINGLE + + ithread = omp_get_thread_num() + + ! -------- thread-specific workload -------- + + work = nc/nthreads + if (ithread < MOD(nc,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nc,nthreads) + 1 + end if + + last_idx = first_idx + work - 1 + + ! --------------------------------------------------- + + first_elem = 0 + last_elem = -1 + act_col = first_idx + do j=1,nzin + if (ja(j) < act_col) then + cycle + else if ((ja(j) > last_idx) .or. (work < 1)) then + exit + else if (ja(j) > act_col) then + nzl = last_elem - first_elem + 1 + + if (nzl > 0) then + call psi_msort_up(nzl,ia(first_elem:),iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),& + & iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) + end if + + act_col = act_col + 1 + first_elem = 0 + last_elem = -1 + else + if (first_elem == 0) then + first_elem = j + end if + + last_elem = j + end if + end do + !$OMP END PARALLEL +#else + i = 1 + j = i + do while (i <= nzin) + do while ((ja(j) == ja(i))) + j = j+1 + if (j > nzin) exit + enddo + nzl = j - i + call psi_msort_up(nzl,ia(i:),iaux(1:),iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(i:i+nzl-1),& + & ia(i:i+nzl-1),ja(i:i+nzl-1),iaux) + i = j + enddo +#endif + + i = 1 + irw = ia(i) + icl = ja(i) + j = 1 + + + select case(dupl) + case(psb_dupl_ovwrt_) + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + val(i) = val(j) + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + + case(psb_dupl_add_) + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + val(i) = val(i) + val(j) + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + + case(psb_dupl_err_) + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + case default + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + end select + + nzout = i + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': end second loop' + + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_fix_coo_inner_colmajor + + +subroutine psb_d_cp_coo_to_lcoo(a,b,info) + use psb_error_mod + use psb_d_base_mat_mod, psb_protect_name => psb_d_cp_coo_to_lcoo + implicit none + class(psb_d_coo_sparse_mat), intent(in) :: a + class(psb_ld_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + integer(psb_lpk_) :: nz + character(len=20) :: name='to_coo' + logical, parameter :: debug=.false. + + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + b%psb_lbase_sparse_mat = a%psb_base_sparse_mat + call b%set_sort_status(a%get_sort_status()) + nz = a%get_nzeros() + call b%set_nzeros(nz) + call b%reallocate(nz) + +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + b%ia(i) = a%ia(i) + b%ja(i) = a%ja(i) + b%val(i) = a%val(i) + end do + end block +#else + b%ia(1:nz) = a%ia(1:nz) + b%ja(1:nz) = a%ja(1:nz) + b%val(1:nz) = a%val(1:nz) +#endif + call b%set_host() + + if (.not.b%is_by_rows()) call b%fix(info) + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cp_coo_to_lcoo + +subroutine psb_d_cp_coo_from_lcoo(a,b,info) + use psb_error_mod + use psb_d_base_mat_mod, psb_protect_name => psb_d_cp_coo_from_lcoo + implicit none + class(psb_d_coo_sparse_mat), intent(inout) :: a + class(psb_ld_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='from_coo' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: m,n,nz + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + a%psb_base_sparse_mat = b%psb_lbase_sparse_mat + call a%set_sort_status(b%get_sort_status()) + nz = b%get_nzeros() + call a%set_nzeros(nz) + call a%reallocate(nz) + +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + a%ia(i) = b%ia(i) + a%ja(i) = b%ja(i) + a%val(i) = b%val(i) + end do + end block +#else + a%ia(1:nz) = b%ia(1:nz) + a%ja(1:nz) = b%ja(1:nz) + a%val(1:nz) = b%val(1:nz) +#endif + call a%set_host() + + if (.not.a%is_by_rows()) call a%fix(info) + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name) + + call psb_error_handler(err_act) + + return + +end subroutine psb_d_cp_coo_from_lcoo + + +! +! +! ld coo impl +! +! + +subroutine psb_ld_coo_get_diag(a,d,info) + use psb_d_base_mat_mod, psb_protect_name => psb_ld_coo_get_diag + use psb_error_mod + use psb_const_mod + implicit none + class(psb_ld_coo_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + integer(psb_lpk_) :: mnm, i, j + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) + goto 9999 + end if + + if (a%is_unit()) then + d(1:mnm) = done + else + d(1:mnm) = dzero + do i=1,a%get_nzeros() + j=a%ia(i) + if ((j == a%ja(i)) .and.(j <= mnm ) .and.(j>0)) then + d(j) = a%val(i) + endif + enddo + end if + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_ld_coo_get_diag + +subroutine psb_ld_coo_scal(d,a,info,side) + use psb_d_base_mat_mod, psb_protect_name => psb_ld_coo_scal + use psb_error_mod + use psb_const_mod + use psb_string_mod + implicit none + class(psb_ld_coo_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + integer(psb_lpk_) :: mnm, i, j, m + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) + goto 9999 + end if + + do i=1,a%get_nzeros() + j = a%ia(i) + a%val(i) = a%val(i) * d(j) + enddo + else + m = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) + goto 9999 + end if do i=1,a%get_nzeros() j = a%ja(i) @@ -5182,13 +6530,13 @@ function psb_ld_coo_maxval(a) result(res) implicit none class(psb_ld_coo_sparse_mat), intent(in) :: a real(psb_dpk_) :: res - + integer(psb_lpk_) :: i,j,k,m,n, nnz, ir, jc, nc, info character(len=20) :: name='d_coo_maxval' logical, parameter :: debug=.false. - + if (a%is_dev()) call a%sync() - + if (a%is_unit()) then res = done else @@ -5198,18 +6546,18 @@ function psb_ld_coo_maxval(a) result(res) if (allocated(a%val)) then nnz = min(nnz,size(a%val)) #if defined(OPENMP) - block - integer(psb_ipk_) :: i - !$omp parallel do private(i) reduction(max: res) - do i=1, nnz - res = max(res,abs(a%val(i))) - end do - end block + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) reduction(max: res) + do i=1, nnz + res = max(res,abs(a%val(i))) + end do + end block #else res = maxval(abs(a%val(1:nnz))) #endif end if - + end function psb_ld_coo_maxval function psb_ld_coo_csnmi(a) result(res) @@ -5265,13 +6613,13 @@ function psb_ld_coo_csnmi(a) result(res) vt(i) = vt(i) + abs(a%val(j)) end do #if defined(OPENMP) - block - integer(psb_ipk_) :: i - !$omp parallel do private(i) reduction(max: res) - do i=1, m - res = max(res,abs(vt(i))) - end do - end block + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) reduction(max: res) + do i=1, m + res = max(res,abs(vt(i))) + end do + end block #else res = maxval(vt(1:m)) #endif @@ -5321,7 +6669,7 @@ function psb_ld_coo_csnm1(a) result(res) do i=1, n res = max(res,abs(vt(i))) end do - end block + end block #else res = maxval(vt(1:n)) #endif @@ -6770,7 +8118,7 @@ subroutine psb_ld_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (nz < 0) then info = psb_err_iarg_neg_ -3 call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) +3 call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) goto 9999 end if if (size(ia) < nz) then diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index 15d8d454..2a2b3d0e 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -728,7 +728,7 @@ subroutine psb_s_coo_print(iout,a,iv,head,ivr,ivc) character(len=80) :: frmt integer(psb_ipk_) :: i,j, ni, nr, nc, nz - write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' if (present(head)) write(iout,'(a,a)') '% ',head write(iout,'(a)') '%' write(iout,'(a,a)') '% COO' @@ -3172,9 +3172,9 @@ subroutine psb_s_cp_coo_from_coo(a,b,info) integer(psb_ipk_) :: i !$omp parallel do private(i) do i=1, nz - a%ia(i) = b%ia(i) - a%ja(i) = b%ja(i) - a%val(i) = b%val(i) + a%ia(i) = b%ia(i) + a%ja(i) = b%ja(i) + a%val(i) = b%val(i) end do end block #else @@ -3615,6 +3615,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) select case(idir_) case(psb_row_major_) +#if 0 ! Row major order if (nr <= nzin) then ! Avoid strange situations with large indices @@ -3730,7 +3731,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) end if if (ithread == 0) then - iaux(1) = 1 + iaux(1) = 1 end if !$OMP BARRIER @@ -3782,11 +3783,11 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) ! The row has elements? if (nzl > 0) then call psi_msort_up(nzl,jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) + & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) if (iret == 0) then - call psb_ip_reord(nzl,vs(first_elem:last_elem),& - & ias(first_elem:last_elem),jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) end if ! Over each row we count the unique values @@ -3804,8 +3805,8 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) ! ---------------- kaux composition ---------------- !$OMP SINGLE - sum(:) = 0 - sum(1) = 1 + sum(:) = 0 + sum(1) = 1 !$OMP END SINGLE s = 0 @@ -3957,7 +3958,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) deallocate(sum,kaux,idxaux,stat=info) #else !if (.not.srt_inp) then - + ip = iaux(1) iaux(1) = 0 do i=2, nr @@ -4086,7 +4087,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) enddo end if end do - + case default write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl_ info =-7 @@ -4097,7 +4098,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) deallocate(ix2, stat=info) #endif - + deallocate(ias,jas,vs, stat=info) else if (.not.use_buffers) then @@ -4261,9 +4262,12 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) if(debug_level >= psb_debug_serial_)& & write(debug_unit,*) trim(name),': end second loop' - +#else + call psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,& + & ia,ja,val,iaux,nzout,info) +#endif case(psb_col_major_) - +#if 0 if (nc <= nzin) then ! Avoid strange situations with large indices #if defined(OPENMP) @@ -4375,7 +4379,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) saved_elem = iaux(first_idx) end if if (ithread == 0) then - iaux(1) = 1 + iaux(1) = 1 end if !$OMP BARRIER @@ -4429,9 +4433,9 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) & idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret) if (iret == 0) then - call psb_ip_reord(nzl,vs(first_elem:last_elem),& - & ias(first_elem:last_elem),jas(first_elem:last_elem), & - & idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) end if ! Over each column we count the unique values @@ -4490,9 +4494,9 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) !$OMP BARRIER - ! ------------------------------------------------ + ! ------------------------------------------------ - select case(dupl_) + select case(dupl_) case(psb_dupl_ovwrt_) !$OMP DO schedule(STATIC) do j=1,nc @@ -4620,7 +4624,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) iaux(icl) = ip end do !end if - + select case(dupl_) case(psb_dupl_ovwrt_) k = 0 @@ -4733,7 +4737,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) info =-7 return end select - + nzout = k deallocate(ix2, stat=info) @@ -4786,9 +4790,9 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) if (nzl > 0) then call psi_msort_up(nzl,ia(first_elem:),iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret) if (iret == 0) & - & call psb_ip_reord(nzl,val(first_elem:last_elem),& - & ia(first_elem:last_elem),ja(first_elem:last_elem),& - & iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),& + & iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) end if act_col = act_col + 1 @@ -4889,7 +4893,10 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) & write(debug_unit,*) trim(name),': end second loop' end if - +#else + call psb_s_fix_coo_inner_colmajor(nr,nc,nzin,dupl,& + & ia,ja,val,iaux,nzout,info) +#endif case default write(debug_unit,*) trim(name),': unknown direction ',idir_ info = psb_err_internal_error_ @@ -4908,219 +4915,1560 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) end subroutine psb_s_fix_coo_inner -subroutine psb_s_cp_coo_to_lcoo(a,b,info) +subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,info) + use psb_const_mod use psb_error_mod - use psb_s_base_mat_mod, psb_protect_name => psb_s_cp_coo_to_lcoo + use psb_s_base_mat_mod, psb_protect_name => psb_s_fix_coo_inner_rowmajor + use psb_string_mod + use psb_ip_reord_mod + use psb_sort_mod +#if defined(OPENMP) + use omp_lib +#endif implicit none - class(psb_s_coo_sparse_mat), intent(in) :: a - class(psb_ls_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - - integer(psb_ipk_) :: err_act - integer(psb_lpk_) :: nz - character(len=20) :: name='to_coo' - logical, parameter :: debug=.false. + integer(psb_ipk_), intent(in) :: nr, nc, nzin, dupl + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:) + real(psb_spk_), intent(inout) :: val(:) + integer(psb_ipk_), intent(out) :: nzout, info + !locals + integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) + real(psb_spk_), allocatable :: vs(:) + integer(psb_ipk_) :: nza, nzl,iret + integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name = 'psb_fixcoo' + logical :: srt_inp, use_buffers - call psb_erractionsave(err_act) - info = psb_success_ - if (a%is_dev()) call a%sync() - - b%psb_lbase_sparse_mat = a%psb_base_sparse_mat - call b%set_sort_status(a%get_sort_status()) - nz = a%get_nzeros() - call b%set_nzeros(nz) - call b%reallocate(nz) + ! Row major order + if (nr <= nzin) then + ! Avoid strange situations with large indices +#if defined(OPENMP) + ! We are not going to need 'ix2' because of the presence + ! of 'idxaux' as auxiliary buffer. + allocate(ias(nzin),jas(nzin),vs(nzin), stat=info) +#else + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) +#endif + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then + iaux(:) = 0 #if defined(OPENMP) - block - integer(psb_ipk_) :: i - !$omp parallel do private(i) - do i=1, nz - b%ia(i) = a%ia(i) - b%ja(i) = a%ja(i) - b%val(i) = a%val(i) + !$OMP PARALLEL DO default(none) schedule(STATIC) & + !$OMP shared(nzin,ia,nr,iaux) & + !$OMP private(i) & + !$OMP reduction(.and.:use_buffers) + do i=1,nzin + if ((ia(i) < 1).or.(ia(i) > nr)) then + use_buffers = .false. + ! Invalid indices are placed outside the considered range + ia(i) = nr+2 + else + !$OMP ATOMIC UPDATE + iaux(ia(i)) = iaux(ia(i)) + 1 + end if end do - end block + !$OMP END PARALLEL DO #else - b%ia(1:nz) = a%ia(1:nz) - b%ja(1:nz) = a%ja(1:nz) - b%val(1:nz) = a%val(1:nz) + !srt_inp = .true. + do i=1,nzin + if ((ia(i) < 1).or.(ia(i) > nr)) then + use_buffers = .false. + !ia(i) = nr+2 + !srt_inp = .false. + exit + end if + + iaux(ia(i)) = iaux(ia(i)) + 1 + + !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) + end do #endif - call b%set_host() + end if - if (.not.b%is_by_rows()) call b%fix(info) + ! Check again use_buffers. We enter here if nzin >= nr and + ! all the indices are valid + ! Check again use_buffers. + if (use_buffers) then +#if defined(OPENMP) + allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if - if (info /= psb_success_) goto 9999 + kaux(:) = 0 + sum(:) = 0 + sum(1) = 1 + err = 0 - call psb_erractionrestore(err_act) - return + ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting + ! index for each row. We do the same on 'kaux' + !$OMP PARALLEL default(none) & + !$OMP shared(idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP private(s,i,j,k,ithread,first_idx,last_idx,work,nxt_val,old_val,saved_elem, & + !$OMP first_elem,last_elem,nzl,iret,act_row) -9999 call psb_error_handler(err_act) + !$OMP SINGLE + nthreads = omp_get_num_threads() + !$OMP END SINGLE - return + ithread = omp_get_thread_num() -end subroutine psb_s_cp_coo_to_lcoo + ! -------- thread-specific workload -------- -subroutine psb_s_cp_coo_from_lcoo(a,b,info) - use psb_error_mod - use psb_s_base_mat_mod, psb_protect_name => psb_s_cp_coo_from_lcoo - implicit none - class(psb_s_coo_sparse_mat), intent(inout) :: a - class(psb_ls_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info + work = nr/nthreads + if (ithread < MOD(nr,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nr,nthreads) + 1 + end if - integer(psb_ipk_) :: err_act - character(len=20) :: name='from_coo' - logical, parameter :: debug=.false. - integer(psb_ipk_) :: m,n,nz + last_idx = first_idx + work - 1 - call psb_erractionsave(err_act) - info = psb_success_ - if (b%is_dev()) call b%sync() - a%psb_base_sparse_mat = b%psb_lbase_sparse_mat - call a%set_sort_status(b%get_sort_status()) - nz = b%get_nzeros() - call a%set_nzeros(nz) - call a%reallocate(nz) + !------------------------------------------- + ! ------------ iaux composition -------------- + ! 'iaux' will have the start index for each row -#if defined(OPENMP) - block - integer(psb_ipk_) :: i - !$omp parallel do private(i) - do i=1, nz - a%ia(i) = b%ia(i) - a%ja(i) = b%ja(i) - a%val(i) = b%val(i) + s = 0 + do i=first_idx,last_idx + s = s + iaux(i) end do - end block -#else - a%ia(1:nz) = b%ia(1:nz) - a%ja(1:nz) = b%ja(1:nz) - a%val(1:nz) = b%val(1:nz) -#endif - call a%set_host() + sum(ithread+2) = s - if (.not.a%is_by_rows()) call a%fix(info) - - if (info /= psb_success_) goto 9999 + !$OMP BARRIER - call psb_erractionrestore(err_act) - return + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + !$OMP END SINGLE -9999 continue - call psb_errpush(info,name) + if (work > 0) then + saved_elem = iaux(first_idx) + end if - call psb_error_handler(err_act) + if (ithread == 0) then + iaux(1) = 1 + end if - return + !$OMP BARRIER -end subroutine psb_s_cp_coo_from_lcoo + if (work > 0) then + old_val = iaux(first_idx+1) + iaux(first_idx+1) = saved_elem + sum(ithread+1) + end if + do i=first_idx+2,last_idx+1 + nxt_val = iaux(i) + iaux(i) = iaux(i-1) + old_val + old_val = nxt_val + end do -! -! -! ls coo impl -! -! + ! -------------------------------------- -subroutine psb_ls_coo_get_diag(a,d,info) - use psb_s_base_mat_mod, psb_protect_name => psb_ls_coo_get_diag - use psb_error_mod - use psb_const_mod - implicit none - class(psb_ls_coo_sparse_mat), intent(in) :: a - real(psb_spk_), intent(out) :: d(:) - integer(psb_ipk_), intent(out) :: info + !$OMP BARRIER - integer(psb_ipk_) :: err_act - integer(psb_lpk_) :: mnm, i, j - character(len=20) :: name='get_diag' - logical, parameter :: debug=.false. + ! ------------------ Sorting and buffers ------------------- - info = psb_success_ - call psb_erractionsave(err_act) - if (a%is_dev()) call a%sync() + ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving + ! unmodified 'iaux' + do j=first_idx,last_idx + idxaux(j) = iaux(j) + end do - mnm = min(a%get_nrows(),a%get_ncols()) - if (size(d) < mnm) then - info=psb_err_input_asize_invalid_i_ - call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) - goto 9999 - end if + ! Here we sort data inside the auxiliary buffers + do i=1,nzin + act_row = ia(i) + if ((act_row >= first_idx) .and. (act_row <= last_idx)) then + ias(idxaux(act_row)) = ia(i) + jas(idxaux(act_row)) = ja(i) + vs(idxaux(act_row)) = val(i) - if (a%is_unit()) then - d(1:mnm) = sone - else - d(1:mnm) = szero - do i=1,a%get_nzeros() - j=a%ia(i) - if ((j == a%ja(i)) .and.(j <= mnm ) .and.(j>0)) then - d(j) = a%val(i) - endif - enddo - end if - call psb_erractionrestore(err_act) - return + idxaux(act_row) = idxaux(act_row) + 1 + end if + end do -9999 call psb_error_handler(err_act) + !$OMP BARRIER + + ! Let's sort column indices and values. After that we will store + ! the number of unique values in 'kaux' + do j=first_idx,last_idx + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + nzl = last_elem - first_elem + 1 + + ! The row has elements? + if (nzl > 0) then + call psi_msort_up(nzl,jas(first_elem:last_elem), & + & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) + if (iret == 0) then + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) + end if - return + ! Over each row we count the unique values + kaux(j) = 1 + do i=first_elem+1,last_elem + if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then + cycle + end if + kaux(j) = kaux(j) + 1 + end do + end if + end do -end subroutine psb_ls_coo_get_diag + ! -------------------------------------------------- + ! ---------------- kaux composition ---------------- -subroutine psb_ls_coo_scal(d,a,info,side) - use psb_s_base_mat_mod, psb_protect_name => psb_ls_coo_scal - use psb_error_mod - use psb_const_mod - use psb_string_mod - implicit none - class(psb_ls_coo_sparse_mat), intent(inout) :: a - real(psb_spk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in), optional :: side + !$OMP SINGLE + sum(:) = 0 + sum(1) = 1 + !$OMP END SINGLE - integer(psb_ipk_) :: err_act - integer(psb_lpk_) :: mnm, i, j, m - character(len=20) :: name='scal' - character :: side_ - logical :: left - logical, parameter :: debug=.false. + s = 0 + do i=first_idx,last_idx + s = s + kaux(i) + end do + sum(ithread+2) = s - info = psb_success_ - call psb_erractionsave(err_act) - if (a%is_dev()) call a%sync() + !$OMP BARRIER - if (a%is_unit()) then - call a%make_nonunit() - end if + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + kaux(nr+1) = sum(nthreads+1) + !$OMP END SINGLE - side_ = 'L' - if (present(side)) then - side_ = psb_toupper(side) - end if + if (work > 0) then + saved_elem = kaux(first_idx) + end if + if (ithread == 0) then + kaux(1) = 1 + end if - left = (side_ == 'L') + !$OMP BARRIER - if (left) then - m = a%get_nrows() - if (size(d) < m) then - info=psb_err_input_asize_invalid_i_ - call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) - goto 9999 + if (work > 0) then + old_val = kaux(first_idx+1) + kaux(first_idx+1) = saved_elem + sum(ithread+1) end if - do i=1,a%get_nzeros() - j = a%ia(i) - a%val(i) = a%val(i) * d(j) - enddo - else - m = a%get_ncols() - if (size(d) < m) then - info=psb_err_input_asize_invalid_i_ - call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) - goto 9999 - end if + do i=first_idx+2,last_idx+1 + nxt_val = kaux(i) + kaux(i) = kaux(i-1) + old_val + old_val = nxt_val + end do + + !$OMP BARRIER + + ! ------------------------------------------------ + + select case(dupl) + case(psb_dupl_ovwrt_) + !$OMP DO schedule(STATIC) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + val(k) = vs(i) + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + endif + end do + end do + !$OMP END DO + + case(psb_dupl_add_) + !$OMP DO schedule(STATIC) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + val(k) = val(k) + vs(i) + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + endif + end do + end do + !$OMP END DO + + case(psb_dupl_err_) + !$OMP DO schedule(STATIC) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + err = 1 + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + end if + end do + end do + !$OMP END DO + + case default + !$OMP SINGLE + err = 2 + !$OMP END SINGLE + end select + + !$OMP END PARALLEL + + if (err == 1) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else if (err == 2) then + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + return + end if + + nzout = kaux(nr+1) - 1 + + deallocate(sum,kaux,idxaux,stat=info) +#else + !if (.not.srt_inp) then + + ip = iaux(1) + iaux(1) = 0 + do i=2, nr + is = iaux(i) + iaux(i) = ip + ip = ip + is + end do + iaux(nr+1) = ip + + do i=1,nzin + irw = ia(i) + ip = iaux(irw) + 1 + ias(ip) = ia(i) + jas(ip) = ja(i) + vs(ip) = val(i) + iaux(irw) = ip + end do + !end if + + select case(dupl) + case(psb_dupl_ovwrt_) + k = 0 + i = 1 + do j=1, nr + + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,jas(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + val(k) = vs(i) + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case(psb_dupl_add_) + k = 0 + i = 1 + do j=1, nr + + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,jas(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + val(k) = val(k) + vs(i) + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case(psb_dupl_err_) + k = 0 + i = 1 + do j=1, nr + + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,jas(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case default + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + return + end select + + nzout = k + + deallocate(ix2, stat=info) +#endif + + deallocate(ias,jas,vs, stat=info) + + else if (.not.use_buffers) then + + ! + ! If we did not have enough memory for buffers, + ! let's try in place. + ! + call psi_msort_up(nzin,ia(1:),iaux(1:),iret) + if (iret == 0) & + & call psb_ip_reord(nzin,val,ia,ja,iaux) +#if defined(OPENMP) + !$OMP PARALLEL default(none) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & + !$OMP private(i,j,first_idx,last_idx,nzl,act_row,iret,ithread, & + !$OMP work,first_elem,last_elem) + + !$OMP SINGLE + nthreads = omp_get_num_threads() + !$OMP END SINGLE + + ithread = omp_get_thread_num() + + ! -------- thread-specific workload -------- + + work = nr/nthreads + if (ithread < MOD(nr,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nr,nthreads) + 1 + end if + + last_idx = first_idx + work - 1 + + ! --------------------------------------------------- + + first_elem = 0 + last_elem = -1 + act_row = first_idx + do j=1,nzin + if (ia(j) < act_row) then + cycle + else if ((ia(j) > last_idx) .or. (work < 1)) then + exit + else if (ia(j) > act_row) then + nzl = last_elem - first_elem + 1 + + if (nzl > 0) then + call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),& + & iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) + end if + + act_row = act_row + 1 + first_elem = 0 + last_elem = -1 + else + if (first_elem == 0) then + first_elem = j + end if + + last_elem = j + end if + end do + !$OMP END PARALLEL +#else + i = 1 + j = i + do while (i <= nzin) + + do while ((ia(j) == ia(i))) + j = j+1 + if (j > nzin) exit + enddo + nzl = j - i + call psi_msort_up(nzl,ja(i:),iaux(1:),iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(i:i+nzl-1),& + & ia(i:i+nzl-1),ja(i:i+nzl-1),iaux) + i = j + enddo +#endif + + select case(dupl) + case(psb_dupl_ovwrt_) + + i = 1 + irw = ia(i) + icl = ja(i) + j = 1 + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + val(i) = val(j) + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + + case(psb_dupl_add_) + + i = 1 + irw = ia(i) + icl = ja(i) + j = 1 + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + val(i) = val(i) + val(j) + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + + case(psb_dupl_err_) + i = 1 + irw = ia(i) + icl = ja(i) + j = 1 + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + case default + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + end select + + nzout = i + + endif + + if(debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': end second loop' + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_fix_coo_inner_rowmajor + +subroutine psb_s_fix_coo_inner_colmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,info) + use psb_const_mod + use psb_error_mod + use psb_s_base_mat_mod, psb_protect_name => psb_s_fix_coo_inner_colmajor + use psb_string_mod + use psb_ip_reord_mod + use psb_sort_mod +#if defined(OPENMP) + use omp_lib +#endif + implicit none + + integer(psb_ipk_), intent(in) :: nr, nc, nzin, dupl + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:) + real(psb_spk_), intent(inout) :: val(:) + integer(psb_ipk_), intent(out) :: nzout, info + !locals + integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) + real(psb_spk_), allocatable :: vs(:) + integer(psb_ipk_) :: nza, nzl,iret + integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name = 'psb_fixcoo' + logical :: srt_inp, use_buffers + if (nc <= nzin) then + ! Avoid strange situations with large indices +#if defined(OPENMP) + allocate(ias(nzin),jas(nzin),vs(nzin), stat=info) +#else + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) +#endif + use_buffers = (info == 0) + else + use_buffers = .false. + end if + + if (use_buffers) then + iaux(:) = 0 +#if defined(OPENMP) + !$OMP PARALLEL DO default(none) schedule(STATIC) & + !$OMP shared(nzin,ja,nc,iaux) & + !$OMP private(i) & + !$OMP reduction(.and.:use_buffers) + do i=1,nzin + if ((ja(i) < 1).or.(ja(i) > nc)) then + use_buffers = .false. + ! Invalid indices are placed outside the considered range + ja(i) = nc+2 + else + !$OMP ATOMIC UPDATE + iaux(ja(i)) = iaux(ja(i)) + 1 + end if + end do + !$OMP END PARALLEL DO +#else + !srt_inp = .true. + do i=1,nzin + if ((ja(i) < 1).or.(ja(i) > nc)) then + use_buffers = .false. + !ja(i) = nc+2 + !srt_inp = .false. + exit + end if + + iaux(ja(i)) = iaux(ja(i)) + 1 + + !srt_inp = srt_inp .and.(ja(i-1)<=ja(i)) + end do +#endif + end if + + !use_buffers=use_buffers.and.srt_inp + + ! Check again use_buffers. We enter here if nzin >= nc and + ! all the indices are valid + if (use_buffers) then +#if defined(OPENMP) + allocate(kaux(MAX(nzin,nc)+2),idxaux(MAX((nr+2)*maxthreads,nc)),sum(maxthreads+1),stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + kaux(:) = 0 + sum(:) = 0 + sum(1) = 1 + err = 0 + + ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting + ! index for each column. We do the same on 'kaux' + !$OMP PARALLEL default(none) & + !$OMP shared(idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP private(s,i,j,k,ithread,first_idx,last_idx,work,nxt_val,old_val,saved_elem, & + !$OMP first_elem,last_elem,nzl,iret,act_col) + + !$OMP SINGLE + nthreads = omp_get_num_threads() + !$OMP END SINGLE + + ithread = omp_get_thread_num() + + ! -------- thread-specific workload -------- + + work = nc/nthreads + if (ithread < MOD(nc,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nc,nthreads) + 1 + end if + + last_idx = first_idx + work - 1 + + !------------------------------------------- + ! ---------- iaux composition -------------- + + s = 0 + do i=first_idx,last_idx + s = s + iaux(i) + end do + sum(ithread+2) = s + + !$OMP BARRIER + + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + !$OMP END SINGLE + + if (work > 0) then + saved_elem = iaux(first_idx) + end if + if (ithread == 0) then + iaux(1) = 1 + end if + + !$OMP BARRIER + + if (work > 0) then + old_val = iaux(first_idx+1) + iaux(first_idx+1) = saved_elem + sum(ithread+1) + end if + + do i=first_idx+2,last_idx+1 + nxt_val = iaux(i) + iaux(i) = iaux(i-1) + old_val + old_val = nxt_val + end do + + ! -------------------------------------- + + !$OMP BARRIER + + ! ------------------ Sorting and buffers ------------------- + + ! Let's use an auxiliary buffer to get indices + do j=first_idx,last_idx + idxaux(j) = iaux(j) + end do + + ! Here we sort data inside the auxiliary buffers + do i=1,nzin + act_col = ja(i) + if (act_col >= first_idx .and. act_col <= last_idx) then + ias(idxaux(act_col)) = ia(i) + jas(idxaux(act_col)) = ja(i) + vs(idxaux(act_col)) = val(i) + + idxaux(act_col) = idxaux(act_col) + 1 + end if + end do + + !$OMP BARRIER + + ! Let's sort column indices and values. After that we will store + ! the number of unique values in 'kaux' + do j=first_idx,last_idx + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + nzl = last_elem - first_elem + 1 + + ! The column has elements? + if (nzl > 0) then + call psi_msort_up(nzl,ias(first_elem:last_elem), & + & idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret) + + if (iret == 0) then + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) + end if + + ! Over each column we count the unique values + kaux(j) = 1 + do i=first_elem+1,last_elem + if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then + cycle + end if + kaux(j) = kaux(j) + 1 + end do + end if + end do + + ! -------------------------------------------------- + + ! ---------------- kaux composition ---------------- + + !$OMP SINGLE + sum(:) = 0 + sum(1) = 1 + !$OMP END SINGLE + + s = 0 + do i=first_idx,last_idx + s = s + kaux(i) + end do + sum(ithread+2) = s + + !$OMP BARRIER + + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + !$OMP END SINGLE + + if (work > 0) then + saved_elem = kaux(first_idx) + end if + if (ithread == 0) then + kaux(1) = 1 + end if + + !$OMP BARRIER + + if (work > 0) then + old_val = kaux(first_idx+1) + kaux(first_idx+1) = saved_elem + sum(ithread+1) + end if + + do i=first_idx+2,last_idx+1 + nxt_val = kaux(i) + kaux(i) = kaux(i-1) + old_val + old_val = nxt_val + end do + + !$OMP BARRIER + + ! ------------------------------------------------ + + select case(dupl) + case(psb_dupl_ovwrt_) + !$OMP DO schedule(STATIC) + do j=1,nc + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + val(k) = vs(i) + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + endif + end do + end do + !$OMP END DO + + case(psb_dupl_add_) + !$OMP DO schedule(STATIC) + do j=1,nc + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + val(k) = val(k) + vs(i) + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + endif + end do + end do + !$OMP END DO + + case(psb_dupl_err_) + !$OMP DO schedule(STATIC) + do j=1,nc + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + err = 1 + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + end if + end do + end do + !$OMP END DO + + case default + !$OMP SINGLE + err = 2 + !$OMP END SINGLE + end select + + !$OMP END PARALLEL + + if (err == 1) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else if (err == 2) then + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + return + end if + + nzout = kaux(nc+1) - 1 + + deallocate(sum,kaux,idxaux,stat=info) +#else + !if (.not.srt_inp) then + ip = iaux(1) + iaux(1) = 0 + do i=2, nc + is = iaux(i) + iaux(i) = ip + ip = ip + is + end do + iaux(nc+1) = ip + + do i=1,nzin + icl = ja(i) + ip = iaux(icl) + 1 + ias(ip) = ia(i) + jas(ip) = ja(i) + vs(ip) = val(i) + iaux(icl) = ip + end do + !end if + + select case(dupl) + case(psb_dupl_ovwrt_) + k = 0 + i = 1 + do j=1, nc + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,ias(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + val(k) = vs(i) + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case(psb_dupl_add_) + k = 0 + i = 1 + do j=1, nc + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,ias(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + val(k) = val(k) + vs(i) + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case(psb_dupl_err_) + k = 0 + i = 1 + do j=1, nc + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,ias(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case default + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + return + end select + + nzout = k + + deallocate(ix2, stat=info) +#endif + + deallocate(ias,jas,vs, stat=info) + + else if (.not.use_buffers) then + + call psi_msort_up(nzin,ja(1:),iaux(1:),iret) + if (iret == 0) & + & call psb_ip_reord(nzin,val,ia,ja,iaux) +#if defined(OPENMP) + !$OMP PARALLEL default(none) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & + !$OMP private(i,j,first_idx,last_idx,nzl,act_col, & + !$OMP iret,ithread,work,first_elem,last_elem) + + !$OMP SINGLE + nthreads = omp_get_num_threads() + !$OMP END SINGLE + + ithread = omp_get_thread_num() + + ! -------- thread-specific workload -------- + + work = nc/nthreads + if (ithread < MOD(nc,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nc,nthreads) + 1 + end if + + last_idx = first_idx + work - 1 + + ! --------------------------------------------------- + + first_elem = 0 + last_elem = -1 + act_col = first_idx + do j=1,nzin + if (ja(j) < act_col) then + cycle + else if ((ja(j) > last_idx) .or. (work < 1)) then + exit + else if (ja(j) > act_col) then + nzl = last_elem - first_elem + 1 + + if (nzl > 0) then + call psi_msort_up(nzl,ia(first_elem:),iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),& + & iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) + end if + + act_col = act_col + 1 + first_elem = 0 + last_elem = -1 + else + if (first_elem == 0) then + first_elem = j + end if + + last_elem = j + end if + end do + !$OMP END PARALLEL +#else + i = 1 + j = i + do while (i <= nzin) + do while ((ja(j) == ja(i))) + j = j+1 + if (j > nzin) exit + enddo + nzl = j - i + call psi_msort_up(nzl,ia(i:),iaux(1:),iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(i:i+nzl-1),& + & ia(i:i+nzl-1),ja(i:i+nzl-1),iaux) + i = j + enddo +#endif + + i = 1 + irw = ia(i) + icl = ja(i) + j = 1 + + + select case(dupl) + case(psb_dupl_ovwrt_) + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + val(i) = val(j) + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + + case(psb_dupl_add_) + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + val(i) = val(i) + val(j) + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + + case(psb_dupl_err_) + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + case default + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + end select + + nzout = i + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': end second loop' + + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_fix_coo_inner_colmajor + + +subroutine psb_s_cp_coo_to_lcoo(a,b,info) + use psb_error_mod + use psb_s_base_mat_mod, psb_protect_name => psb_s_cp_coo_to_lcoo + implicit none + class(psb_s_coo_sparse_mat), intent(in) :: a + class(psb_ls_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + integer(psb_lpk_) :: nz + character(len=20) :: name='to_coo' + logical, parameter :: debug=.false. + + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + b%psb_lbase_sparse_mat = a%psb_base_sparse_mat + call b%set_sort_status(a%get_sort_status()) + nz = a%get_nzeros() + call b%set_nzeros(nz) + call b%reallocate(nz) + +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + b%ia(i) = a%ia(i) + b%ja(i) = a%ja(i) + b%val(i) = a%val(i) + end do + end block +#else + b%ia(1:nz) = a%ia(1:nz) + b%ja(1:nz) = a%ja(1:nz) + b%val(1:nz) = a%val(1:nz) +#endif + call b%set_host() + + if (.not.b%is_by_rows()) call b%fix(info) + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cp_coo_to_lcoo + +subroutine psb_s_cp_coo_from_lcoo(a,b,info) + use psb_error_mod + use psb_s_base_mat_mod, psb_protect_name => psb_s_cp_coo_from_lcoo + implicit none + class(psb_s_coo_sparse_mat), intent(inout) :: a + class(psb_ls_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='from_coo' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: m,n,nz + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + a%psb_base_sparse_mat = b%psb_lbase_sparse_mat + call a%set_sort_status(b%get_sort_status()) + nz = b%get_nzeros() + call a%set_nzeros(nz) + call a%reallocate(nz) + +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + a%ia(i) = b%ia(i) + a%ja(i) = b%ja(i) + a%val(i) = b%val(i) + end do + end block +#else + a%ia(1:nz) = b%ia(1:nz) + a%ja(1:nz) = b%ja(1:nz) + a%val(1:nz) = b%val(1:nz) +#endif + call a%set_host() + + if (.not.a%is_by_rows()) call a%fix(info) + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name) + + call psb_error_handler(err_act) + + return + +end subroutine psb_s_cp_coo_from_lcoo + + +! +! +! ls coo impl +! +! + +subroutine psb_ls_coo_get_diag(a,d,info) + use psb_s_base_mat_mod, psb_protect_name => psb_ls_coo_get_diag + use psb_error_mod + use psb_const_mod + implicit none + class(psb_ls_coo_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + integer(psb_lpk_) :: mnm, i, j + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) + goto 9999 + end if + + if (a%is_unit()) then + d(1:mnm) = sone + else + d(1:mnm) = szero + do i=1,a%get_nzeros() + j=a%ia(i) + if ((j == a%ja(i)) .and.(j <= mnm ) .and.(j>0)) then + d(j) = a%val(i) + endif + enddo + end if + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_ls_coo_get_diag + +subroutine psb_ls_coo_scal(d,a,info,side) + use psb_s_base_mat_mod, psb_protect_name => psb_ls_coo_scal + use psb_error_mod + use psb_const_mod + use psb_string_mod + implicit none + class(psb_ls_coo_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + integer(psb_lpk_) :: mnm, i, j, m + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) + goto 9999 + end if + + do i=1,a%get_nzeros() + j = a%ia(i) + a%val(i) = a%val(i) * d(j) + enddo + else + m = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) + goto 9999 + end if do i=1,a%get_nzeros() j = a%ja(i) @@ -5182,13 +6530,13 @@ function psb_ls_coo_maxval(a) result(res) implicit none class(psb_ls_coo_sparse_mat), intent(in) :: a real(psb_spk_) :: res - + integer(psb_lpk_) :: i,j,k,m,n, nnz, ir, jc, nc, info character(len=20) :: name='s_coo_maxval' logical, parameter :: debug=.false. - + if (a%is_dev()) call a%sync() - + if (a%is_unit()) then res = sone else @@ -5198,18 +6546,18 @@ function psb_ls_coo_maxval(a) result(res) if (allocated(a%val)) then nnz = min(nnz,size(a%val)) #if defined(OPENMP) - block - integer(psb_ipk_) :: i - !$omp parallel do private(i) reduction(max: res) - do i=1, nnz - res = max(res,abs(a%val(i))) - end do - end block + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) reduction(max: res) + do i=1, nnz + res = max(res,abs(a%val(i))) + end do + end block #else res = maxval(abs(a%val(1:nnz))) #endif end if - + end function psb_ls_coo_maxval function psb_ls_coo_csnmi(a) result(res) @@ -5265,13 +6613,13 @@ function psb_ls_coo_csnmi(a) result(res) vt(i) = vt(i) + abs(a%val(j)) end do #if defined(OPENMP) - block - integer(psb_ipk_) :: i - !$omp parallel do private(i) reduction(max: res) - do i=1, m - res = max(res,abs(vt(i))) - end do - end block + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) reduction(max: res) + do i=1, m + res = max(res,abs(vt(i))) + end do + end block #else res = maxval(vt(1:m)) #endif @@ -5321,7 +6669,7 @@ function psb_ls_coo_csnm1(a) result(res) do i=1, n res = max(res,abs(vt(i))) end do - end block + end block #else res = maxval(vt(1:n)) #endif @@ -6770,7 +8118,7 @@ subroutine psb_ls_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (nz < 0) then info = psb_err_iarg_neg_ -3 call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) +3 call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) goto 9999 end if if (size(ia) < nz) then diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index 99150120..02613b0e 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -728,7 +728,7 @@ subroutine psb_z_coo_print(iout,a,iv,head,ivr,ivc) character(len=80) :: frmt integer(psb_ipk_) :: i,j, ni, nr, nc, nz - write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' if (present(head)) write(iout,'(a,a)') '% ',head write(iout,'(a)') '%' write(iout,'(a,a)') '% COO' @@ -3172,9 +3172,9 @@ subroutine psb_z_cp_coo_from_coo(a,b,info) integer(psb_ipk_) :: i !$omp parallel do private(i) do i=1, nz - a%ia(i) = b%ia(i) - a%ja(i) = b%ja(i) - a%val(i) = b%val(i) + a%ia(i) = b%ia(i) + a%ja(i) = b%ja(i) + a%val(i) = b%val(i) end do end block #else @@ -3615,6 +3615,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) select case(idir_) case(psb_row_major_) +#if 0 ! Row major order if (nr <= nzin) then ! Avoid strange situations with large indices @@ -3730,7 +3731,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) end if if (ithread == 0) then - iaux(1) = 1 + iaux(1) = 1 end if !$OMP BARRIER @@ -3782,11 +3783,11 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) ! The row has elements? if (nzl > 0) then call psi_msort_up(nzl,jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) + & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) if (iret == 0) then - call psb_ip_reord(nzl,vs(first_elem:last_elem),& - & ias(first_elem:last_elem),jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) end if ! Over each row we count the unique values @@ -3804,8 +3805,8 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) ! ---------------- kaux composition ---------------- !$OMP SINGLE - sum(:) = 0 - sum(1) = 1 + sum(:) = 0 + sum(1) = 1 !$OMP END SINGLE s = 0 @@ -3957,7 +3958,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) deallocate(sum,kaux,idxaux,stat=info) #else !if (.not.srt_inp) then - + ip = iaux(1) iaux(1) = 0 do i=2, nr @@ -4086,7 +4087,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) enddo end if end do - + case default write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl_ info =-7 @@ -4097,7 +4098,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) deallocate(ix2, stat=info) #endif - + deallocate(ias,jas,vs, stat=info) else if (.not.use_buffers) then @@ -4261,9 +4262,12 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) if(debug_level >= psb_debug_serial_)& & write(debug_unit,*) trim(name),': end second loop' - +#else + call psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,& + & ia,ja,val,iaux,nzout,info) +#endif case(psb_col_major_) - +#if 0 if (nc <= nzin) then ! Avoid strange situations with large indices #if defined(OPENMP) @@ -4375,7 +4379,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) saved_elem = iaux(first_idx) end if if (ithread == 0) then - iaux(1) = 1 + iaux(1) = 1 end if !$OMP BARRIER @@ -4429,9 +4433,9 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) & idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret) if (iret == 0) then - call psb_ip_reord(nzl,vs(first_elem:last_elem),& - & ias(first_elem:last_elem),jas(first_elem:last_elem), & - & idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) end if ! Over each column we count the unique values @@ -4490,9 +4494,9 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) !$OMP BARRIER - ! ------------------------------------------------ + ! ------------------------------------------------ - select case(dupl_) + select case(dupl_) case(psb_dupl_ovwrt_) !$OMP DO schedule(STATIC) do j=1,nc @@ -4620,7 +4624,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) iaux(icl) = ip end do !end if - + select case(dupl_) case(psb_dupl_ovwrt_) k = 0 @@ -4733,7 +4737,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) info =-7 return end select - + nzout = k deallocate(ix2, stat=info) @@ -4786,9 +4790,9 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) if (nzl > 0) then call psi_msort_up(nzl,ia(first_elem:),iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret) if (iret == 0) & - & call psb_ip_reord(nzl,val(first_elem:last_elem),& - & ia(first_elem:last_elem),ja(first_elem:last_elem),& - & iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),& + & iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) end if act_col = act_col + 1 @@ -4889,7 +4893,10 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) & write(debug_unit,*) trim(name),': end second loop' end if - +#else + call psb_z_fix_coo_inner_colmajor(nr,nc,nzin,dupl,& + & ia,ja,val,iaux,nzout,info) +#endif case default write(debug_unit,*) trim(name),': unknown direction ',idir_ info = psb_err_internal_error_ @@ -4908,219 +4915,1560 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) end subroutine psb_z_fix_coo_inner -subroutine psb_z_cp_coo_to_lcoo(a,b,info) +subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,info) + use psb_const_mod use psb_error_mod - use psb_z_base_mat_mod, psb_protect_name => psb_z_cp_coo_to_lcoo + use psb_z_base_mat_mod, psb_protect_name => psb_z_fix_coo_inner_rowmajor + use psb_string_mod + use psb_ip_reord_mod + use psb_sort_mod +#if defined(OPENMP) + use omp_lib +#endif implicit none - class(psb_z_coo_sparse_mat), intent(in) :: a - class(psb_lz_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - - integer(psb_ipk_) :: err_act - integer(psb_lpk_) :: nz - character(len=20) :: name='to_coo' - logical, parameter :: debug=.false. + integer(psb_ipk_), intent(in) :: nr, nc, nzin, dupl + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:) + complex(psb_dpk_), intent(inout) :: val(:) + integer(psb_ipk_), intent(out) :: nzout, info + !locals + integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) + complex(psb_dpk_), allocatable :: vs(:) + integer(psb_ipk_) :: nza, nzl,iret + integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name = 'psb_fixcoo' + logical :: srt_inp, use_buffers - call psb_erractionsave(err_act) - info = psb_success_ - if (a%is_dev()) call a%sync() - - b%psb_lbase_sparse_mat = a%psb_base_sparse_mat - call b%set_sort_status(a%get_sort_status()) - nz = a%get_nzeros() - call b%set_nzeros(nz) - call b%reallocate(nz) + ! Row major order + if (nr <= nzin) then + ! Avoid strange situations with large indices +#if defined(OPENMP) + ! We are not going to need 'ix2' because of the presence + ! of 'idxaux' as auxiliary buffer. + allocate(ias(nzin),jas(nzin),vs(nzin), stat=info) +#else + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) +#endif + use_buffers = (info == 0) + else + use_buffers = .false. + end if + if (use_buffers) then + iaux(:) = 0 #if defined(OPENMP) - block - integer(psb_ipk_) :: i - !$omp parallel do private(i) - do i=1, nz - b%ia(i) = a%ia(i) - b%ja(i) = a%ja(i) - b%val(i) = a%val(i) + !$OMP PARALLEL DO default(none) schedule(STATIC) & + !$OMP shared(nzin,ia,nr,iaux) & + !$OMP private(i) & + !$OMP reduction(.and.:use_buffers) + do i=1,nzin + if ((ia(i) < 1).or.(ia(i) > nr)) then + use_buffers = .false. + ! Invalid indices are placed outside the considered range + ia(i) = nr+2 + else + !$OMP ATOMIC UPDATE + iaux(ia(i)) = iaux(ia(i)) + 1 + end if end do - end block + !$OMP END PARALLEL DO #else - b%ia(1:nz) = a%ia(1:nz) - b%ja(1:nz) = a%ja(1:nz) - b%val(1:nz) = a%val(1:nz) + !srt_inp = .true. + do i=1,nzin + if ((ia(i) < 1).or.(ia(i) > nr)) then + use_buffers = .false. + !ia(i) = nr+2 + !srt_inp = .false. + exit + end if + + iaux(ia(i)) = iaux(ia(i)) + 1 + + !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) + end do #endif - call b%set_host() + end if - if (.not.b%is_by_rows()) call b%fix(info) + ! Check again use_buffers. We enter here if nzin >= nr and + ! all the indices are valid + ! Check again use_buffers. + if (use_buffers) then +#if defined(OPENMP) + allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if - if (info /= psb_success_) goto 9999 + kaux(:) = 0 + sum(:) = 0 + sum(1) = 1 + err = 0 - call psb_erractionrestore(err_act) - return + ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting + ! index for each row. We do the same on 'kaux' + !$OMP PARALLEL default(none) & + !$OMP shared(idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP private(s,i,j,k,ithread,first_idx,last_idx,work,nxt_val,old_val,saved_elem, & + !$OMP first_elem,last_elem,nzl,iret,act_row) -9999 call psb_error_handler(err_act) + !$OMP SINGLE + nthreads = omp_get_num_threads() + !$OMP END SINGLE - return + ithread = omp_get_thread_num() -end subroutine psb_z_cp_coo_to_lcoo + ! -------- thread-specific workload -------- -subroutine psb_z_cp_coo_from_lcoo(a,b,info) - use psb_error_mod - use psb_z_base_mat_mod, psb_protect_name => psb_z_cp_coo_from_lcoo - implicit none - class(psb_z_coo_sparse_mat), intent(inout) :: a - class(psb_lz_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info + work = nr/nthreads + if (ithread < MOD(nr,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nr,nthreads) + 1 + end if - integer(psb_ipk_) :: err_act - character(len=20) :: name='from_coo' - logical, parameter :: debug=.false. - integer(psb_ipk_) :: m,n,nz + last_idx = first_idx + work - 1 - call psb_erractionsave(err_act) - info = psb_success_ - if (b%is_dev()) call b%sync() - a%psb_base_sparse_mat = b%psb_lbase_sparse_mat - call a%set_sort_status(b%get_sort_status()) - nz = b%get_nzeros() - call a%set_nzeros(nz) - call a%reallocate(nz) + !------------------------------------------- + ! ------------ iaux composition -------------- + ! 'iaux' will have the start index for each row -#if defined(OPENMP) - block - integer(psb_ipk_) :: i - !$omp parallel do private(i) - do i=1, nz - a%ia(i) = b%ia(i) - a%ja(i) = b%ja(i) - a%val(i) = b%val(i) + s = 0 + do i=first_idx,last_idx + s = s + iaux(i) end do - end block -#else - a%ia(1:nz) = b%ia(1:nz) - a%ja(1:nz) = b%ja(1:nz) - a%val(1:nz) = b%val(1:nz) -#endif - call a%set_host() + sum(ithread+2) = s - if (.not.a%is_by_rows()) call a%fix(info) - - if (info /= psb_success_) goto 9999 + !$OMP BARRIER - call psb_erractionrestore(err_act) - return + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + !$OMP END SINGLE -9999 continue - call psb_errpush(info,name) + if (work > 0) then + saved_elem = iaux(first_idx) + end if - call psb_error_handler(err_act) + if (ithread == 0) then + iaux(1) = 1 + end if - return + !$OMP BARRIER -end subroutine psb_z_cp_coo_from_lcoo + if (work > 0) then + old_val = iaux(first_idx+1) + iaux(first_idx+1) = saved_elem + sum(ithread+1) + end if + do i=first_idx+2,last_idx+1 + nxt_val = iaux(i) + iaux(i) = iaux(i-1) + old_val + old_val = nxt_val + end do -! -! -! lz coo impl -! -! + ! -------------------------------------- -subroutine psb_lz_coo_get_diag(a,d,info) - use psb_z_base_mat_mod, psb_protect_name => psb_lz_coo_get_diag - use psb_error_mod - use psb_const_mod - implicit none - class(psb_lz_coo_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(out) :: d(:) - integer(psb_ipk_), intent(out) :: info + !$OMP BARRIER - integer(psb_ipk_) :: err_act - integer(psb_lpk_) :: mnm, i, j - character(len=20) :: name='get_diag' - logical, parameter :: debug=.false. + ! ------------------ Sorting and buffers ------------------- - info = psb_success_ - call psb_erractionsave(err_act) - if (a%is_dev()) call a%sync() + ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving + ! unmodified 'iaux' + do j=first_idx,last_idx + idxaux(j) = iaux(j) + end do - mnm = min(a%get_nrows(),a%get_ncols()) - if (size(d) < mnm) then - info=psb_err_input_asize_invalid_i_ - call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) - goto 9999 - end if + ! Here we sort data inside the auxiliary buffers + do i=1,nzin + act_row = ia(i) + if ((act_row >= first_idx) .and. (act_row <= last_idx)) then + ias(idxaux(act_row)) = ia(i) + jas(idxaux(act_row)) = ja(i) + vs(idxaux(act_row)) = val(i) - if (a%is_unit()) then - d(1:mnm) = zone - else - d(1:mnm) = zzero - do i=1,a%get_nzeros() - j=a%ia(i) - if ((j == a%ja(i)) .and.(j <= mnm ) .and.(j>0)) then - d(j) = a%val(i) - endif - enddo - end if - call psb_erractionrestore(err_act) - return + idxaux(act_row) = idxaux(act_row) + 1 + end if + end do -9999 call psb_error_handler(err_act) + !$OMP BARRIER + + ! Let's sort column indices and values. After that we will store + ! the number of unique values in 'kaux' + do j=first_idx,last_idx + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + nzl = last_elem - first_elem + 1 + + ! The row has elements? + if (nzl > 0) then + call psi_msort_up(nzl,jas(first_elem:last_elem), & + & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) + if (iret == 0) then + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) + end if - return + ! Over each row we count the unique values + kaux(j) = 1 + do i=first_elem+1,last_elem + if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then + cycle + end if + kaux(j) = kaux(j) + 1 + end do + end if + end do -end subroutine psb_lz_coo_get_diag + ! -------------------------------------------------- + ! ---------------- kaux composition ---------------- -subroutine psb_lz_coo_scal(d,a,info,side) - use psb_z_base_mat_mod, psb_protect_name => psb_lz_coo_scal - use psb_error_mod - use psb_const_mod - use psb_string_mod - implicit none - class(psb_lz_coo_sparse_mat), intent(inout) :: a - complex(psb_dpk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in), optional :: side + !$OMP SINGLE + sum(:) = 0 + sum(1) = 1 + !$OMP END SINGLE - integer(psb_ipk_) :: err_act - integer(psb_lpk_) :: mnm, i, j, m - character(len=20) :: name='scal' - character :: side_ - logical :: left - logical, parameter :: debug=.false. + s = 0 + do i=first_idx,last_idx + s = s + kaux(i) + end do + sum(ithread+2) = s - info = psb_success_ - call psb_erractionsave(err_act) - if (a%is_dev()) call a%sync() + !$OMP BARRIER - if (a%is_unit()) then - call a%make_nonunit() - end if + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + kaux(nr+1) = sum(nthreads+1) + !$OMP END SINGLE - side_ = 'L' - if (present(side)) then - side_ = psb_toupper(side) - end if + if (work > 0) then + saved_elem = kaux(first_idx) + end if + if (ithread == 0) then + kaux(1) = 1 + end if - left = (side_ == 'L') + !$OMP BARRIER - if (left) then - m = a%get_nrows() - if (size(d) < m) then - info=psb_err_input_asize_invalid_i_ - call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) - goto 9999 + if (work > 0) then + old_val = kaux(first_idx+1) + kaux(first_idx+1) = saved_elem + sum(ithread+1) end if - do i=1,a%get_nzeros() - j = a%ia(i) - a%val(i) = a%val(i) * d(j) - enddo - else - m = a%get_ncols() - if (size(d) < m) then - info=psb_err_input_asize_invalid_i_ - call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) - goto 9999 - end if + do i=first_idx+2,last_idx+1 + nxt_val = kaux(i) + kaux(i) = kaux(i-1) + old_val + old_val = nxt_val + end do + + !$OMP BARRIER + + ! ------------------------------------------------ + + select case(dupl) + case(psb_dupl_ovwrt_) + !$OMP DO schedule(STATIC) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + val(k) = vs(i) + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + endif + end do + end do + !$OMP END DO + + case(psb_dupl_add_) + !$OMP DO schedule(STATIC) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + val(k) = val(k) + vs(i) + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + endif + end do + end do + !$OMP END DO + + case(psb_dupl_err_) + !$OMP DO schedule(STATIC) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + err = 1 + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + end if + end do + end do + !$OMP END DO + + case default + !$OMP SINGLE + err = 2 + !$OMP END SINGLE + end select + + !$OMP END PARALLEL + + if (err == 1) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else if (err == 2) then + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + return + end if + + nzout = kaux(nr+1) - 1 + + deallocate(sum,kaux,idxaux,stat=info) +#else + !if (.not.srt_inp) then + + ip = iaux(1) + iaux(1) = 0 + do i=2, nr + is = iaux(i) + iaux(i) = ip + ip = ip + is + end do + iaux(nr+1) = ip + + do i=1,nzin + irw = ia(i) + ip = iaux(irw) + 1 + ias(ip) = ia(i) + jas(ip) = ja(i) + vs(ip) = val(i) + iaux(irw) = ip + end do + !end if + + select case(dupl) + case(psb_dupl_ovwrt_) + k = 0 + i = 1 + do j=1, nr + + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,jas(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + val(k) = vs(i) + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case(psb_dupl_add_) + k = 0 + i = 1 + do j=1, nr + + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,jas(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + val(k) = val(k) + vs(i) + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case(psb_dupl_err_) + k = 0 + i = 1 + do j=1, nr + + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,jas(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case default + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + return + end select + + nzout = k + + deallocate(ix2, stat=info) +#endif + + deallocate(ias,jas,vs, stat=info) + + else if (.not.use_buffers) then + + ! + ! If we did not have enough memory for buffers, + ! let's try in place. + ! + call psi_msort_up(nzin,ia(1:),iaux(1:),iret) + if (iret == 0) & + & call psb_ip_reord(nzin,val,ia,ja,iaux) +#if defined(OPENMP) + !$OMP PARALLEL default(none) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & + !$OMP private(i,j,first_idx,last_idx,nzl,act_row,iret,ithread, & + !$OMP work,first_elem,last_elem) + + !$OMP SINGLE + nthreads = omp_get_num_threads() + !$OMP END SINGLE + + ithread = omp_get_thread_num() + + ! -------- thread-specific workload -------- + + work = nr/nthreads + if (ithread < MOD(nr,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nr,nthreads) + 1 + end if + + last_idx = first_idx + work - 1 + + ! --------------------------------------------------- + + first_elem = 0 + last_elem = -1 + act_row = first_idx + do j=1,nzin + if (ia(j) < act_row) then + cycle + else if ((ia(j) > last_idx) .or. (work < 1)) then + exit + else if (ia(j) > act_row) then + nzl = last_elem - first_elem + 1 + + if (nzl > 0) then + call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),& + & iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) + end if + + act_row = act_row + 1 + first_elem = 0 + last_elem = -1 + else + if (first_elem == 0) then + first_elem = j + end if + + last_elem = j + end if + end do + !$OMP END PARALLEL +#else + i = 1 + j = i + do while (i <= nzin) + + do while ((ia(j) == ia(i))) + j = j+1 + if (j > nzin) exit + enddo + nzl = j - i + call psi_msort_up(nzl,ja(i:),iaux(1:),iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(i:i+nzl-1),& + & ia(i:i+nzl-1),ja(i:i+nzl-1),iaux) + i = j + enddo +#endif + + select case(dupl) + case(psb_dupl_ovwrt_) + + i = 1 + irw = ia(i) + icl = ja(i) + j = 1 + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + val(i) = val(j) + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + + case(psb_dupl_add_) + + i = 1 + irw = ia(i) + icl = ja(i) + j = 1 + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + val(i) = val(i) + val(j) + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + + case(psb_dupl_err_) + i = 1 + irw = ia(i) + icl = ja(i) + j = 1 + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + case default + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + end select + + nzout = i + + endif + + if(debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': end second loop' + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_fix_coo_inner_rowmajor + +subroutine psb_z_fix_coo_inner_colmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,info) + use psb_const_mod + use psb_error_mod + use psb_z_base_mat_mod, psb_protect_name => psb_z_fix_coo_inner_colmajor + use psb_string_mod + use psb_ip_reord_mod + use psb_sort_mod +#if defined(OPENMP) + use omp_lib +#endif + implicit none + + integer(psb_ipk_), intent(in) :: nr, nc, nzin, dupl + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:) + complex(psb_dpk_), intent(inout) :: val(:) + integer(psb_ipk_), intent(out) :: nzout, info + !locals + integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) + complex(psb_dpk_), allocatable :: vs(:) + integer(psb_ipk_) :: nza, nzl,iret + integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name = 'psb_fixcoo' + logical :: srt_inp, use_buffers + if (nc <= nzin) then + ! Avoid strange situations with large indices +#if defined(OPENMP) + allocate(ias(nzin),jas(nzin),vs(nzin), stat=info) +#else + allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info) +#endif + use_buffers = (info == 0) + else + use_buffers = .false. + end if + + if (use_buffers) then + iaux(:) = 0 +#if defined(OPENMP) + !$OMP PARALLEL DO default(none) schedule(STATIC) & + !$OMP shared(nzin,ja,nc,iaux) & + !$OMP private(i) & + !$OMP reduction(.and.:use_buffers) + do i=1,nzin + if ((ja(i) < 1).or.(ja(i) > nc)) then + use_buffers = .false. + ! Invalid indices are placed outside the considered range + ja(i) = nc+2 + else + !$OMP ATOMIC UPDATE + iaux(ja(i)) = iaux(ja(i)) + 1 + end if + end do + !$OMP END PARALLEL DO +#else + !srt_inp = .true. + do i=1,nzin + if ((ja(i) < 1).or.(ja(i) > nc)) then + use_buffers = .false. + !ja(i) = nc+2 + !srt_inp = .false. + exit + end if + + iaux(ja(i)) = iaux(ja(i)) + 1 + + !srt_inp = srt_inp .and.(ja(i-1)<=ja(i)) + end do +#endif + end if + + !use_buffers=use_buffers.and.srt_inp + + ! Check again use_buffers. We enter here if nzin >= nc and + ! all the indices are valid + if (use_buffers) then +#if defined(OPENMP) + allocate(kaux(MAX(nzin,nc)+2),idxaux(MAX((nr+2)*maxthreads,nc)),sum(maxthreads+1),stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + kaux(:) = 0 + sum(:) = 0 + sum(1) = 1 + err = 0 + + ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting + ! index for each column. We do the same on 'kaux' + !$OMP PARALLEL default(none) & + !$OMP shared(idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP private(s,i,j,k,ithread,first_idx,last_idx,work,nxt_val,old_val,saved_elem, & + !$OMP first_elem,last_elem,nzl,iret,act_col) + + !$OMP SINGLE + nthreads = omp_get_num_threads() + !$OMP END SINGLE + + ithread = omp_get_thread_num() + + ! -------- thread-specific workload -------- + + work = nc/nthreads + if (ithread < MOD(nc,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nc,nthreads) + 1 + end if + + last_idx = first_idx + work - 1 + + !------------------------------------------- + ! ---------- iaux composition -------------- + + s = 0 + do i=first_idx,last_idx + s = s + iaux(i) + end do + sum(ithread+2) = s + + !$OMP BARRIER + + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + !$OMP END SINGLE + + if (work > 0) then + saved_elem = iaux(first_idx) + end if + if (ithread == 0) then + iaux(1) = 1 + end if + + !$OMP BARRIER + + if (work > 0) then + old_val = iaux(first_idx+1) + iaux(first_idx+1) = saved_elem + sum(ithread+1) + end if + + do i=first_idx+2,last_idx+1 + nxt_val = iaux(i) + iaux(i) = iaux(i-1) + old_val + old_val = nxt_val + end do + + ! -------------------------------------- + + !$OMP BARRIER + + ! ------------------ Sorting and buffers ------------------- + + ! Let's use an auxiliary buffer to get indices + do j=first_idx,last_idx + idxaux(j) = iaux(j) + end do + + ! Here we sort data inside the auxiliary buffers + do i=1,nzin + act_col = ja(i) + if (act_col >= first_idx .and. act_col <= last_idx) then + ias(idxaux(act_col)) = ia(i) + jas(idxaux(act_col)) = ja(i) + vs(idxaux(act_col)) = val(i) + + idxaux(act_col) = idxaux(act_col) + 1 + end if + end do + + !$OMP BARRIER + + ! Let's sort column indices and values. After that we will store + ! the number of unique values in 'kaux' + do j=first_idx,last_idx + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + nzl = last_elem - first_elem + 1 + + ! The column has elements? + if (nzl > 0) then + call psi_msort_up(nzl,ias(first_elem:last_elem), & + & idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret) + + if (iret == 0) then + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) + end if + + ! Over each column we count the unique values + kaux(j) = 1 + do i=first_elem+1,last_elem + if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then + cycle + end if + kaux(j) = kaux(j) + 1 + end do + end if + end do + + ! -------------------------------------------------- + + ! ---------------- kaux composition ---------------- + + !$OMP SINGLE + sum(:) = 0 + sum(1) = 1 + !$OMP END SINGLE + + s = 0 + do i=first_idx,last_idx + s = s + kaux(i) + end do + sum(ithread+2) = s + + !$OMP BARRIER + + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + !$OMP END SINGLE + + if (work > 0) then + saved_elem = kaux(first_idx) + end if + if (ithread == 0) then + kaux(1) = 1 + end if + + !$OMP BARRIER + + if (work > 0) then + old_val = kaux(first_idx+1) + kaux(first_idx+1) = saved_elem + sum(ithread+1) + end if + + do i=first_idx+2,last_idx+1 + nxt_val = kaux(i) + kaux(i) = kaux(i-1) + old_val + old_val = nxt_val + end do + + !$OMP BARRIER + + ! ------------------------------------------------ + + select case(dupl) + case(psb_dupl_ovwrt_) + !$OMP DO schedule(STATIC) + do j=1,nc + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + val(k) = vs(i) + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + endif + end do + end do + !$OMP END DO + + case(psb_dupl_add_) + !$OMP DO schedule(STATIC) + do j=1,nc + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + val(k) = val(k) + vs(i) + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + endif + end do + end do + !$OMP END DO + + case(psb_dupl_err_) + !$OMP DO schedule(STATIC) + do j=1,nc + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + + if (first_elem > last_elem) then + cycle + end if + + k = kaux(j) + + val(k) = vs(first_elem) + ia(k) = ias(first_elem) + ja(k) = jas(first_elem) + + do i=first_elem+1,last_elem + if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then + err = 1 + else + k = k + 1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + end if + end do + end do + !$OMP END DO + + case default + !$OMP SINGLE + err = 2 + !$OMP END SINGLE + end select + + !$OMP END PARALLEL + + if (err == 1) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else if (err == 2) then + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + return + end if + + nzout = kaux(nc+1) - 1 + + deallocate(sum,kaux,idxaux,stat=info) +#else + !if (.not.srt_inp) then + ip = iaux(1) + iaux(1) = 0 + do i=2, nc + is = iaux(i) + iaux(i) = ip + ip = ip + is + end do + iaux(nc+1) = ip + + do i=1,nzin + icl = ja(i) + ip = iaux(icl) + 1 + ias(ip) = ia(i) + jas(ip) = ja(i) + vs(ip) = val(i) + iaux(icl) = ip + end do + !end if + + select case(dupl) + case(psb_dupl_ovwrt_) + k = 0 + i = 1 + do j=1, nc + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,ias(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + val(k) = vs(i) + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case(psb_dupl_add_) + k = 0 + i = 1 + do j=1, nc + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,ias(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + val(k) = val(k) + vs(i) + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case(psb_dupl_err_) + k = 0 + i = 1 + do j=1, nc + nzl = iaux(j)-i+1 + imx = i+nzl-1 + + if (nzl > 0) then + call psi_msort_up(nzl,ias(i:imx),ix2,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,vs(i:imx),& + & ias(i:imx),jas(i:imx),ix2) + k = k + 1 + ia(k) = ias(i) + ja(k) = jas(i) + val(k) = vs(i) + irw = ia(k) + icl = ja(k) + do + i = i + 1 + if (i > imx) exit + if ((ias(i) == irw).and.(jas(i) == icl)) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else + k = k+1 + val(k) = vs(i) + ia(k) = ias(i) + ja(k) = jas(i) + irw = ia(k) + icl = ja(k) + endif + enddo + end if + end do + + case default + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + return + end select + + nzout = k + + deallocate(ix2, stat=info) +#endif + + deallocate(ias,jas,vs, stat=info) + + else if (.not.use_buffers) then + + call psi_msort_up(nzin,ja(1:),iaux(1:),iret) + if (iret == 0) & + & call psb_ip_reord(nzin,val,ia,ja,iaux) +#if defined(OPENMP) + !$OMP PARALLEL default(none) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & + !$OMP private(i,j,first_idx,last_idx,nzl,act_col, & + !$OMP iret,ithread,work,first_elem,last_elem) + + !$OMP SINGLE + nthreads = omp_get_num_threads() + !$OMP END SINGLE + + ithread = omp_get_thread_num() + + ! -------- thread-specific workload -------- + + work = nc/nthreads + if (ithread < MOD(nc,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nc,nthreads) + 1 + end if + + last_idx = first_idx + work - 1 + + ! --------------------------------------------------- + + first_elem = 0 + last_elem = -1 + act_col = first_idx + do j=1,nzin + if (ja(j) < act_col) then + cycle + else if ((ja(j) > last_idx) .or. (work < 1)) then + exit + else if (ja(j) > act_col) then + nzl = last_elem - first_elem + 1 + + if (nzl > 0) then + call psi_msort_up(nzl,ia(first_elem:),iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),& + & iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2)) + end if + + act_col = act_col + 1 + first_elem = 0 + last_elem = -1 + else + if (first_elem == 0) then + first_elem = j + end if + + last_elem = j + end if + end do + !$OMP END PARALLEL +#else + i = 1 + j = i + do while (i <= nzin) + do while ((ja(j) == ja(i))) + j = j+1 + if (j > nzin) exit + enddo + nzl = j - i + call psi_msort_up(nzl,ia(i:),iaux(1:),iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(i:i+nzl-1),& + & ia(i:i+nzl-1),ja(i:i+nzl-1),iaux) + i = j + enddo +#endif + + i = 1 + irw = ia(i) + icl = ja(i) + j = 1 + + + select case(dupl) + case(psb_dupl_ovwrt_) + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + val(i) = val(j) + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + + case(psb_dupl_add_) + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + val(i) = val(i) + val(j) + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + + case(psb_dupl_err_) + do + j = j + 1 + if (j > nzin) exit + if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle + if ((ia(j) == irw).and.(ja(j) == icl)) then + call psb_errpush(psb_err_duplicate_coo,name) + goto 9999 + else + i = i+1 + val(i) = val(j) + ia(i) = ia(j) + ja(i) = ja(j) + irw = ia(i) + icl = ja(i) + endif + enddo + case default + write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl + info =-7 + end select + + nzout = i + + if (debug_level >= psb_debug_serial_)& + & write(debug_unit,*) trim(name),': end second loop' + + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_fix_coo_inner_colmajor + + +subroutine psb_z_cp_coo_to_lcoo(a,b,info) + use psb_error_mod + use psb_z_base_mat_mod, psb_protect_name => psb_z_cp_coo_to_lcoo + implicit none + class(psb_z_coo_sparse_mat), intent(in) :: a + class(psb_lz_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + integer(psb_lpk_) :: nz + character(len=20) :: name='to_coo' + logical, parameter :: debug=.false. + + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + b%psb_lbase_sparse_mat = a%psb_base_sparse_mat + call b%set_sort_status(a%get_sort_status()) + nz = a%get_nzeros() + call b%set_nzeros(nz) + call b%reallocate(nz) + +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + b%ia(i) = a%ia(i) + b%ja(i) = a%ja(i) + b%val(i) = a%val(i) + end do + end block +#else + b%ia(1:nz) = a%ia(1:nz) + b%ja(1:nz) = a%ja(1:nz) + b%val(1:nz) = a%val(1:nz) +#endif + call b%set_host() + + if (.not.b%is_by_rows()) call b%fix(info) + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cp_coo_to_lcoo + +subroutine psb_z_cp_coo_from_lcoo(a,b,info) + use psb_error_mod + use psb_z_base_mat_mod, psb_protect_name => psb_z_cp_coo_from_lcoo + implicit none + class(psb_z_coo_sparse_mat), intent(inout) :: a + class(psb_lz_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='from_coo' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: m,n,nz + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + a%psb_base_sparse_mat = b%psb_lbase_sparse_mat + call a%set_sort_status(b%get_sort_status()) + nz = b%get_nzeros() + call a%set_nzeros(nz) + call a%reallocate(nz) + +#if defined(OPENMP) + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) + do i=1, nz + a%ia(i) = b%ia(i) + a%ja(i) = b%ja(i) + a%val(i) = b%val(i) + end do + end block +#else + a%ia(1:nz) = b%ia(1:nz) + a%ja(1:nz) = b%ja(1:nz) + a%val(1:nz) = b%val(1:nz) +#endif + call a%set_host() + + if (.not.a%is_by_rows()) call a%fix(info) + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name) + + call psb_error_handler(err_act) + + return + +end subroutine psb_z_cp_coo_from_lcoo + + +! +! +! lz coo impl +! +! + +subroutine psb_lz_coo_get_diag(a,d,info) + use psb_z_base_mat_mod, psb_protect_name => psb_lz_coo_get_diag + use psb_error_mod + use psb_const_mod + implicit none + class(psb_lz_coo_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + integer(psb_lpk_) :: mnm, i, j + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) + goto 9999 + end if + + if (a%is_unit()) then + d(1:mnm) = zone + else + d(1:mnm) = zzero + do i=1,a%get_nzeros() + j=a%ia(i) + if ((j == a%ja(i)) .and.(j <= mnm ) .and.(j>0)) then + d(j) = a%val(i) + endif + enddo + end if + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_lz_coo_get_diag + +subroutine psb_lz_coo_scal(d,a,info,side) + use psb_z_base_mat_mod, psb_protect_name => psb_lz_coo_scal + use psb_error_mod + use psb_const_mod + use psb_string_mod + implicit none + class(psb_lz_coo_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + integer(psb_lpk_) :: mnm, i, j, m + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) + goto 9999 + end if + + do i=1,a%get_nzeros() + j = a%ia(i) + a%val(i) = a%val(i) * d(j) + enddo + else + m = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,l_err=(/2_psb_lpk_,size(d,kind=psb_lpk_)/)) + goto 9999 + end if do i=1,a%get_nzeros() j = a%ja(i) @@ -5182,13 +6530,13 @@ function psb_lz_coo_maxval(a) result(res) implicit none class(psb_lz_coo_sparse_mat), intent(in) :: a real(psb_dpk_) :: res - + integer(psb_lpk_) :: i,j,k,m,n, nnz, ir, jc, nc, info character(len=20) :: name='z_coo_maxval' logical, parameter :: debug=.false. - + if (a%is_dev()) call a%sync() - + if (a%is_unit()) then res = done else @@ -5198,18 +6546,18 @@ function psb_lz_coo_maxval(a) result(res) if (allocated(a%val)) then nnz = min(nnz,size(a%val)) #if defined(OPENMP) - block - integer(psb_ipk_) :: i - !$omp parallel do private(i) reduction(max: res) - do i=1, nnz - res = max(res,abs(a%val(i))) - end do - end block + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) reduction(max: res) + do i=1, nnz + res = max(res,abs(a%val(i))) + end do + end block #else res = maxval(abs(a%val(1:nnz))) #endif end if - + end function psb_lz_coo_maxval function psb_lz_coo_csnmi(a) result(res) @@ -5265,13 +6613,13 @@ function psb_lz_coo_csnmi(a) result(res) vt(i) = vt(i) + abs(a%val(j)) end do #if defined(OPENMP) - block - integer(psb_ipk_) :: i - !$omp parallel do private(i) reduction(max: res) - do i=1, m - res = max(res,abs(vt(i))) - end do - end block + block + integer(psb_ipk_) :: i + !$omp parallel do private(i) reduction(max: res) + do i=1, m + res = max(res,abs(vt(i))) + end do + end block #else res = maxval(vt(1:m)) #endif @@ -5321,7 +6669,7 @@ function psb_lz_coo_csnm1(a) result(res) do i=1, n res = max(res,abs(vt(i))) end do - end block + end block #else res = maxval(vt(1:n)) #endif @@ -6770,7 +8118,7 @@ subroutine psb_lz_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (nz < 0) then info = psb_err_iarg_neg_ -3 call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) +3 call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) goto 9999 end if if (size(ia) < nz) then