diff --git a/base/comm/Makefile b/base/comm/Makefile index dfa1bed8..b7fcdeea 100644 --- a/base/comm/Makefile +++ b/base/comm/Makefile @@ -8,6 +8,7 @@ OBJS = psb_dgather.o psb_dhalo.o psb_dovrl.o \ psb_zgather.o psb_zhalo.o psb_zovrl.o \ psb_dgather_a.o psb_dhalo_a.o psb_dovrl_a.o \ psb_sgather_a.o psb_shalo_a.o psb_sovrl_a.o \ + psb_i2gather_a.o psb_i2halo_a.o psb_i2ovrl_a.o \ psb_mgather_a.o psb_mhalo_a.o psb_movrl_a.o \ psb_egather_a.o psb_ehalo_a.o psb_eovrl_a.o \ psb_cgather_a.o psb_chalo_a.o psb_covrl_a.o \ @@ -18,7 +19,7 @@ MPFOBJS=psb_dscatter.o psb_zscatter.o \ psb_iscatter.o psb_lscatter.o \ psb_cscatter.o psb_sscatter.o \ psb_dscatter_a.o psb_zscatter_a.o \ - psb_mscatter_a.o psb_escatter_a.o \ + psb_mscatter_a.o psb_escatter_a.o psb_i2scatter_a.o \ psb_cscatter_a.o psb_sscatter_a.o \ psb_dspgather.o psb_sspgather.o \ psb_zspgather.o psb_cspgather.o diff --git a/base/comm/internals/Makefile b/base/comm/internals/Makefile index 6e3a9e76..b3aaf308 100644 --- a/base/comm/internals/Makefile +++ b/base/comm/internals/Makefile @@ -6,6 +6,7 @@ FOBJS = psi_iovrl_restr.o psi_iovrl_save.o psi_iovrl_upd.o \ psi_dovrl_restr.o psi_dovrl_save.o psi_dovrl_upd.o \ psi_covrl_restr.o psi_covrl_save.o psi_covrl_upd.o \ psi_zovrl_restr.o psi_zovrl_save.o psi_zovrl_upd.o \ + psi_i2ovrl_restr_a.o psi_i2ovrl_save_a.o psi_i2ovrl_upd_a.o \ psi_movrl_restr_a.o psi_movrl_save_a.o psi_movrl_upd_a.o \ psi_eovrl_restr_a.o psi_eovrl_save_a.o psi_eovrl_upd_a.o \ psi_sovrl_restr_a.o psi_sovrl_save_a.o psi_sovrl_upd_a.o \ @@ -21,6 +22,7 @@ MPFOBJS = psi_dswapdata.o psi_dswaptran.o\ psi_zswapdata.o psi_zswaptran.o \ psi_dswapdata_a.o psi_dswaptran_a.o \ psi_sswapdata_a.o psi_sswaptran_a.o \ + psi_i2swapdata_a.o psi_i2swaptran_a.o \ psi_mswapdata_a.o psi_mswaptran_a.o \ psi_eswapdata_a.o psi_eswaptran_a.o \ psi_cswapdata_a.o psi_cswaptran_a.o \ diff --git a/base/modules/Makefile b/base/modules/Makefile index 86870bc5..ba665b57 100644 --- a/base/modules/Makefile +++ b/base/modules/Makefile @@ -28,6 +28,7 @@ COMMINT= penv/psi_penv_mod.o \ SERIAL_MODS=serial/psb_s_serial_mod.o serial/psb_d_serial_mod.o \ serial/psb_c_serial_mod.o serial/psb_z_serial_mod.o \ serial/psb_serial_mod.o \ + serial/psb_i2_base_vect_mod.o serial/psb_i2_vect_mod.o\ serial/psb_i_base_vect_mod.o serial/psb_i_vect_mod.o\ serial/psb_l_base_vect_mod.o serial/psb_l_vect_mod.o\ serial/psb_d_base_vect_mod.o serial/psb_d_vect_mod.o\ @@ -35,15 +36,19 @@ SERIAL_MODS=serial/psb_s_serial_mod.o serial/psb_d_serial_mod.o \ serial/psb_c_base_vect_mod.o serial/psb_c_vect_mod.o\ serial/psb_z_base_vect_mod.o serial/psb_z_vect_mod.o\ serial/psb_vect_mod.o\ - auxil/psi_serial_mod.o auxil/psi_m_serial_mod.o auxil/psi_e_serial_mod.o \ + auxil/psi_serial_mod.o \ + auxil/psi_i2_serial_mod.o auxil/psi_m_serial_mod.o auxil/psi_e_serial_mod.o \ auxil/psi_s_serial_mod.o auxil/psi_d_serial_mod.o \ auxil/psi_c_serial_mod.o auxil/psi_z_serial_mod.o \ - psi_mod.o psi_i_mod.o psi_l_mod.o psi_s_mod.o psi_d_mod.o psi_c_mod.o psi_z_mod.o\ + psi_mod.o psi_i2_mod.o psi_i_mod.o psi_l_mod.o psi_s_mod.o psi_d_mod.o psi_c_mod.o psi_z_mod.o\ auxil/psb_ip_reord_mod.o\ auxil/psi_acx_mod.o auxil/psi_alcx_mod.o auxil/psi_lcx_mod.o \ + auxil/psb_i2_ip_reord_mod.o \ auxil/psb_m_ip_reord_mod.o auxil/psb_e_ip_reord_mod.o \ auxil/psb_s_ip_reord_mod.o auxil/psb_d_ip_reord_mod.o \ auxil/psb_c_ip_reord_mod.o auxil/psb_z_ip_reord_mod.o \ + auxil/psb_i2_hsort_mod.o auxil/psb_i2_isort_mod.o \ + auxil/psb_i2_msort_mod.o auxil/psb_i2_qsort_mod.o \ auxil/psb_m_hsort_mod.o auxil/psb_m_isort_mod.o \ auxil/psb_m_msort_mod.o auxil/psb_m_qsort_mod.o \ auxil/psb_e_hsort_mod.o auxil/psb_e_isort_mod.o \ @@ -56,6 +61,7 @@ SERIAL_MODS=serial/psb_s_serial_mod.o serial/psb_d_serial_mod.o \ auxil/psb_c_msort_mod.o auxil/psb_c_qsort_mod.o \ auxil/psb_z_hsort_mod.o auxil/psb_z_isort_mod.o \ auxil/psb_z_msort_mod.o auxil/psb_z_qsort_mod.o \ + auxil/psb_i2_hsort_x_mod.o \ auxil/psb_i_hsort_x_mod.o \ auxil/psb_l_hsort_x_mod.o \ auxil/psb_s_hsort_x_mod.o \ @@ -85,7 +91,7 @@ UTIL_MODS = desc/psb_desc_const_mod.o desc/psb_indx_map_mod.o\ tools/psb_i_tools_mod.o tools/psb_l_tools_mod.o \ tools/psb_s_tools_mod.o tools/psb_d_tools_mod.o\ tools/psb_c_tools_mod.o tools/psb_z_tools_mod.o \ - tools/psb_m_tools_a_mod.o tools/psb_e_tools_a_mod.o \ + tools/psb_i2_tools_a_mod.o tools/psb_m_tools_a_mod.o tools/psb_e_tools_a_mod.o \ tools/psb_s_tools_a_mod.o tools/psb_d_tools_a_mod.o\ tools/psb_c_tools_a_mod.o tools/psb_z_tools_a_mod.o \ tools/psb_tools_mod.o \ @@ -101,7 +107,7 @@ UTIL_MODS = desc/psb_desc_const_mod.o desc/psb_indx_map_mod.o\ comm/psb_s_comm_a_mod.o comm/psb_d_comm_a_mod.o\ comm/psb_c_comm_a_mod.o comm/psb_z_comm_a_mod.o \ comm/psi_e_comm_a_mod.o comm/psi_m_comm_a_mod.o \ - comm/psi_i2_comm_a_mod.o \ + comm/psi_i2_comm_a_mod.o comm/psi_i2_comm_v_mod.o \ comm/psi_s_comm_a_mod.o comm/psi_d_comm_a_mod.o \ comm/psi_c_comm_a_mod.o comm/psi_z_comm_a_mod.o \ comm/psi_i_comm_v_mod.o comm/psi_l_comm_v_mod.o \ @@ -136,6 +142,7 @@ $(LIBDIR)/$(LIBNAME): objs $(OBJS): $(MODULES) psb_error_mod.o: psb_const_mod.o psb_realloc_mod.o \ + auxil/psb_i2_realloc_mod.o \ auxil/psb_m_realloc_mod.o \ auxil/psb_e_realloc_mod.o \ auxil/psb_s_realloc_mod.o \ @@ -148,6 +155,7 @@ penv/psi_collective_mod.o penv/psi_p2p_mod.o: penv/psi_penv_mod.o psb_realloc_mod.o: auxil/psb_m_realloc_mod.o \ auxil/psb_e_realloc_mod.o \ + auxil/psb_i2_realloc_mod.o \ auxil/psb_s_realloc_mod.o \ auxil/psb_d_realloc_mod.o \ auxil/psb_c_realloc_mod.o \ @@ -178,7 +186,8 @@ penv/psi_d_collective_mod.o penv/psi_c_collective_mod.o penv/psi_z_collective_m auxil/psi_acx_mod.o auxil/psi_alcx_mod.o auxil/psi_lcx_mod.o \ -auxil/psb_string_mod.o auxil/psb_m_realloc_mod.o auxil/psb_e_realloc_mod.o auxil/psb_s_realloc_mod.o \ +auxil/psb_string_mod.o auxil/psb_i2_realloc_mod.o auxil/psb_m_realloc_mod.o auxil/psb_e_realloc_mod.o \ +auxil/psb_s_realloc_mod.o \ auxil/psb_d_realloc_mod.o auxil/psb_c_realloc_mod.o auxil/psb_z_realloc_mod.o \ desc/psb_desc_const_mod.o psi_penv_mod.o: psb_const_mod.o @@ -192,6 +201,8 @@ auxil/psb_sort_mod.o: auxil/psb_m_hsort_mod.o auxil/psb_m_isort_mod.o \ auxil/psb_m_msort_mod.o auxil/psb_m_qsort_mod.o \ auxil/psb_e_hsort_mod.o auxil/psb_e_isort_mod.o \ auxil/psb_e_msort_mod.o auxil/psb_e_qsort_mod.o \ + auxil/psb_i2_hsort_mod.o auxil/psb_i2_isort_mod.o \ + auxil/psb_i2_msort_mod.o auxil/psb_i2_qsort_mod.o \ auxil/psb_s_hsort_mod.o auxil/psb_s_isort_mod.o \ auxil/psb_s_msort_mod.o auxil/psb_s_qsort_mod.o \ auxil/psb_d_hsort_mod.o auxil/psb_d_isort_mod.o \ @@ -200,6 +211,7 @@ auxil/psb_sort_mod.o: auxil/psb_m_hsort_mod.o auxil/psb_m_isort_mod.o \ auxil/psb_c_msort_mod.o auxil/psb_c_qsort_mod.o \ auxil/psb_z_hsort_mod.o auxil/psb_z_isort_mod.o \ auxil/psb_z_msort_mod.o auxil/psb_z_qsort_mod.o \ + auxil/psb_i2_hsort_x_mod.o \ auxil/psb_i_hsort_x_mod.o \ auxil/psb_l_hsort_x_mod.o \ auxil/psb_s_hsort_x_mod.o \ @@ -213,6 +225,8 @@ auxil/psb_m_hsort_mod.o auxil/psb_m_isort_mod.o \ auxil/psb_m_msort_mod.o auxil/psb_m_qsort_mod.o \ auxil/psb_e_hsort_mod.o auxil/psb_e_isort_mod.o \ auxil/psb_e_msort_mod.o auxil/psb_e_qsort_mod.o \ +auxil/psb_i2_hsort_mod.o auxil/psb_i2_isort_mod.o \ +auxil/psb_i2_msort_mod.o auxil/psb_i2_qsort_mod.o \ auxil/psb_s_hsort_mod.o auxil/psb_s_isort_mod.o \ auxil/psb_s_msort_mod.o auxil/psb_s_qsort_mod.o \ auxil/psb_d_hsort_mod.o auxil/psb_d_isort_mod.o \ @@ -221,17 +235,19 @@ auxil/psb_c_hsort_mod.o auxil/psb_c_isort_mod.o \ auxil/psb_c_msort_mod.o auxil/psb_c_qsort_mod.o \ auxil/psb_z_hsort_mod.o auxil/psb_z_isort_mod.o \ auxil/psb_z_msort_mod.o auxil/psb_z_qsort_mod.o \ +auxil/psb_i2_hsort_x_mod.o \ auxil/psb_i_hsort_x_mod.o \ auxil/psb_l_hsort_x_mod.o \ auxil/psb_s_hsort_x_mod.o \ auxil/psb_d_hsort_x_mod.o \ auxil/psb_c_hsort_x_mod.o \ auxil/psb_z_hsort_x_mod.o \ -auxil/psb_m_ip_reord_mod.o auxil/psb_e_ip_reord_mod.o \ +auxil/psb_i2_ip_reord_mod.o auxil/psb_m_ip_reord_mod.o auxil/psb_e_ip_reord_mod.o \ auxil/psb_s_ip_reord_mod.o auxil/psb_d_ip_reord_mod.o \ auxil/psb_c_ip_reord_mod.o auxil/psb_z_ip_reord_mod.o : psb_realloc_mod.o psb_const_mod.o +auxil/psb_i_hsort_x_mod.o: auxil/psb_m_hsort_mod.o auxil/psb_e_hsort_mod.o auxil/psb_i2_hsort_mod.o auxil/psb_i_hsort_x_mod.o: auxil/psb_m_hsort_mod.o auxil/psb_e_hsort_mod.o auxil/psb_l_hsort_x_mod.o: auxil/psb_m_hsort_mod.o auxil/psb_e_hsort_mod.o auxil/psb_s_hsort_x_mod.o: auxil/psb_s_hsort_mod.o @@ -239,14 +255,14 @@ auxil/psb_d_hsort_x_mod.o: auxil/psb_d_hsort_mod.o auxil/psb_c_hsort_x_mod.o: auxil/psb_c_hsort_mod.o auxil/psb_z_hsort_x_mod.o: auxil/psb_z_hsort_mod.o -auxil/psi_serial_mod.o: auxil/psi_m_serial_mod.o auxil/psi_e_serial_mod.o \ +auxil/psi_serial_mod.o: auxil/psi_i2_serial_mod.o auxil/psi_m_serial_mod.o auxil/psi_e_serial_mod.o \ auxil/psi_s_serial_mod.o auxil/psi_d_serial_mod.o\ auxil/psi_c_serial_mod.o auxil/psi_z_serial_mod.o \ auxil/psi_acx_mod.o auxil/psi_alcx_mod.o auxil/psi_lcx_mod.o -auxil/psi_m_serial_mod.o auxil/psi_e_serial_mod.o auxil/psi_s_serial_mod.o auxil/psi_d_serial_mod.o auxil/psi_c_serial_mod.o auxil/psi_z_serial_mod.o: psb_const_mod.o +auxil/psi_i2_serial_mod.o auxil/psi_m_serial_mod.o auxil/psi_e_serial_mod.o auxil/psi_s_serial_mod.o auxil/psi_d_serial_mod.o auxil/psi_c_serial_mod.o auxil/psi_z_serial_mod.o: psb_const_mod.o -auxil/psb_ip_reord_mod.o: auxil/psb_m_ip_reord_mod.o auxil/psb_e_ip_reord_mod.o \ +auxil/psb_ip_reord_mod.o: auxil/psb_i2_ip_reord_mod.o auxil/psb_m_ip_reord_mod.o auxil/psb_e_ip_reord_mod.o \ auxil/psb_s_ip_reord_mod.o auxil/psb_d_ip_reord_mod.o \ auxil/psb_c_ip_reord_mod.o auxil/psb_z_ip_reord_mod.o @@ -261,7 +277,8 @@ serial/psb_d_base_mat_mod.o: serial/psb_d_base_vect_mod.o #serial/psb_ld_base_mat_mod.o: serial/psb_d_base_vect_mod.o serial/psb_c_base_mat_mod.o: serial/psb_c_base_vect_mod.o serial/psb_z_base_mat_mod.o: serial/psb_z_base_vect_mod.o -serial/psb_l_base_vect_mod.o: serial/psb_i_base_vect_mod.o +serial/psb_l_base_vect_mod.o: serial/psb_i_base_vect_mod.o +serial/psb_i2_base_vect_mod.o: serial/psb_i_base_vect_mod.o serial/psb_c_base_vect_mod.o serial/psb_s_base_vect_mod.o serial/psb_d_base_vect_mod.o serial/psb_z_base_vect_mod.o: serial/psb_i_base_vect_mod.o serial/psb_l_base_vect_mod.o serial/psb_i_base_vect_mod.o serial/psb_l_base_vect_mod.o serial/psb_c_base_vect_mod.o serial/psb_s_base_vect_mod.o serial/psb_d_base_vect_mod.o serial/psb_z_base_vect_mod.o: auxil/psi_serial_mod.o psb_realloc_mod.o serial/psb_s_mat_mod.o: serial/psb_s_base_mat_mod.o serial/psb_s_csr_mat_mod.o serial/psb_s_csc_mat_mod.o serial/psb_s_vect_mod.o \ @@ -279,6 +296,7 @@ serial/psb_z_csc_mat_mod.o serial/psb_z_csr_mat_mod.o serial/psb_lz_csr_mat_mod. serial/psb_mat_mod.o: serial/psb_vect_mod.o serial/psb_s_mat_mod.o serial/psb_d_mat_mod.o serial/psb_c_mat_mod.o serial/psb_z_mat_mod.o serial/psb_serial_mod.o: serial/psb_s_serial_mod.o serial/psb_d_serial_mod.o serial/psb_c_serial_mod.o serial/psb_z_serial_mod.o auxil/psi_serial_mod.o +serial/psb_i2_vect_mod.o: serial/psb_i2_base_vect_mod.o serial/psb_i_base_vect_mod.o serial/psb_i_vect_mod.o: serial/psb_i_base_vect_mod.o serial/psb_l_vect_mod.o: serial/psb_l_base_vect_mod.o serial/psb_i_vect_mod.o serial/psb_s_vect_mod.o: serial/psb_s_base_vect_mod.o serial/psb_i_vect_mod.o @@ -305,10 +323,12 @@ desc/psb_desc_mod.o: psb_penv_mod.o psb_realloc_mod.o\ desc/psb_repl_map_mod.o desc/psb_gen_block_map_mod.o desc/psb_desc_const_mod.o\ desc/psb_indx_map_mod.o serial/psb_i_vect_mod.o +psi_i2_mod.o: desc/psb_desc_mod.o serial/psb_i2_vect_mod.o comm/psi_e_comm_a_mod.o \ + comm/psi_m_comm_a_mod.o comm/psi_i_comm_v_mod.o comm/psi_i2_comm_a_mod.o comm/psi_i2_comm_v_mod.o psi_i_mod.o: desc/psb_desc_mod.o serial/psb_i_vect_mod.o comm/psi_e_comm_a_mod.o \ - comm/psi_m_comm_a_mod.o comm/psi_i_comm_v_mod.o comm/psi_i2_comm_a_mod.o + comm/psi_m_comm_a_mod.o comm/psi_i_comm_v_mod.o comm/psi_i2_comm_a_mod.o comm/psi_i2_comm_v_mod.o psi_l_mod.o: desc/psb_desc_mod.o serial/psb_l_vect_mod.o comm/psi_e_comm_a_mod.o \ - comm/psi_m_comm_a_mod.o comm/psi_l_comm_v_mod.o comm/psi_i2_comm_a_mod.o + comm/psi_m_comm_a_mod.o comm/psi_l_comm_v_mod.o comm/psi_i2_comm_a_mod.o comm/psi_i2_comm_v_mod.o psi_s_mod.o: desc/psb_desc_mod.o serial/psb_s_vect_mod.o comm/psi_s_comm_a_mod.o \ comm/psi_s_comm_v_mod.o psi_d_mod.o: desc/psb_desc_mod.o serial/psb_d_vect_mod.o comm/psi_d_comm_a_mod.o \ @@ -318,7 +338,7 @@ psi_c_mod.o: desc/psb_desc_mod.o serial/psb_c_vect_mod.o comm/psi_c_comm_a_mod.o psi_z_mod.o: desc/psb_desc_mod.o serial/psb_z_vect_mod.o comm/psi_z_comm_a_mod.o \ comm/psi_z_comm_v_mod.o psi_mod.o: psb_penv_mod.o desc/psb_desc_mod.o auxil/psi_serial_mod.o serial/psb_serial_mod.o\ - psi_i_mod.o psi_l_mod.o psi_s_mod.o psi_d_mod.o psi_c_mod.o psi_z_mod.o + psi_i2_mod.o psi_i_mod.o psi_l_mod.o psi_s_mod.o psi_d_mod.o psi_c_mod.o psi_z_mod.o desc/psb_indx_map_mod.o: desc/psb_desc_const_mod.o psb_error_mod.o psb_penv_mod.o psb_realloc_mod.o auxil/psb_sort_mod.o desc/psb_hash_map_mod.o desc/psb_list_map_mod.o desc/psb_repl_map_mod.o desc/psb_gen_block_map_mod.o:\ @@ -338,24 +358,28 @@ comm/psb_c_linmap_mod.o: comm/psb_base_linmap_mod.o serial/psb_c_mat_mod.o seria comm/psb_z_linmap_mod.o: comm/psb_base_linmap_mod.o serial/psb_z_mat_mod.o serial/psb_z_vect_mod.o comm/psb_base_linmap_mod.o: desc/psb_desc_mod.o serial/psb_serial_mod.o comm/psb_comm_mod.o comm/psb_comm_mod.o: desc/psb_desc_mod.o serial/psb_mat_mod.o -comm/psb_comm_mod.o: comm/psb_i_comm_mod.o comm/psb_l_comm_mod.o \ +comm/psb_comm_mod.o: comm/psb_i2_comm_mod.o comm/psb_i_comm_mod.o comm/psb_l_comm_mod.o \ comm/psb_s_comm_mod.o comm/psb_d_comm_mod.o \ comm/psb_c_comm_mod.o comm/psb_z_comm_mod.o \ + comm/psb_i2_comm_a_mod.o \ comm/psb_m_comm_a_mod.o comm/psb_e_comm_a_mod.o \ comm/psb_s_comm_a_mod.o comm/psb_d_comm_a_mod.o\ comm/psb_c_comm_a_mod.o comm/psb_z_comm_a_mod.o -comm/psb_m_comm_a_mod.o comm/psb_e_comm_a_mod.o \ +comm/psb_i2_comm_a_mod.o comm/psb_m_comm_a_mod.o comm/psb_e_comm_a_mod.o \ comm/psb_s_comm_a_mod.o comm/psb_d_comm_a_mod.o\ comm/psb_c_comm_a_mod.o comm/psb_z_comm_a_mod.o: desc/psb_desc_mod.o -comm/psb_i_comm_mod.o: serial/psb_i_vect_mod.o desc/psb_desc_mod.o +comm/psb_i2_comm_mod.o: serial/psb_i2_vect_mod.o desc/psb_desc_mod.o +comm/psb_i_comm_mod.o: serial/psb_i_vect_mod.o desc/psb_desc_mod.o comm/psb_l_comm_mod.o: serial/psb_l_vect_mod.o desc/psb_desc_mod.o comm/psb_s_comm_mod.o: serial/psb_s_vect_mod.o desc/psb_desc_mod.o serial/psb_mat_mod.o comm/psb_d_comm_mod.o: serial/psb_d_vect_mod.o desc/psb_desc_mod.o serial/psb_mat_mod.o comm/psb_c_comm_mod.o: serial/psb_c_vect_mod.o desc/psb_desc_mod.o serial/psb_mat_mod.o comm/psb_z_comm_mod.o: serial/psb_z_vect_mod.o desc/psb_desc_mod.o serial/psb_mat_mod.o +comm/psi_i2_comm_v_mod.o: serial/psb_i2_vect_mod.o comm/psi_i2_comm_a_mod.o \ + comm/psi_m_comm_a_mod.o comm/psi_i_comm_v_mod.o: serial/psb_i_vect_mod.o comm/psi_e_comm_a_mod.o \ comm/psi_m_comm_a_mod.o comm/psi_l_comm_v_mod.o: serial/psb_l_vect_mod.o comm/psi_e_comm_a_mod.o \ @@ -374,14 +398,15 @@ comm/psi_c_comm_a_mod.o comm/psi_z_comm_a_mod.o: desc/psb_desc_mod.o tools/psb_tools_mod.o: tools/psb_cd_tools_mod.o tools/psb_s_tools_mod.o tools/psb_d_tools_mod.o\ tools/psb_i_tools_mod.o tools/psb_l_tools_mod.o \ tools/psb_c_tools_mod.o tools/psb_z_tools_mod.o \ - tools/psb_m_tools_a_mod.o tools/psb_e_tools_a_mod.o \ + tools/psb_i2_tools_a_mod.o tools/psb_m_tools_a_mod.o tools/psb_e_tools_a_mod.o \ tools/psb_s_tools_a_mod.o tools/psb_d_tools_a_mod.o\ tools/psb_c_tools_a_mod.o tools/psb_z_tools_a_mod.o tools/psb_cd_tools_mod.o tools/psb_i_tools_mod.o tools/psb_l_tools_mod.o \ tools/psb_s_tools_mod.o tools/psb_d_tools_mod.o \ -tools/psb_c_tools_mod.o tools/psb_z_tools_mod.o tools/psb_m_tools_a_mod.o tools/psb_e_tools_a_mod.o \ +tools/psb_c_tools_mod.o tools/psb_z_tools_mod.o \ +tools/psb_i2_tools_a_mod.o tools/psb_m_tools_a_mod.o tools/psb_e_tools_a_mod.o \ tools/psb_s_tools_a_mod.o tools/psb_d_tools_a_mod.o\ tools/psb_c_tools_a_mod.o tools/psb_z_tools_a_mod.o: desc/psb_desc_mod.o psi_mod.o serial/psb_mat_mod.o diff --git a/base/modules/auxil/psb_i2_hsort_x_mod.f90 b/base/modules/auxil/psb_i2_hsort_x_mod.f90 new file mode 100644 index 00000000..1c4d0ea8 --- /dev/null +++ b/base/modules/auxil/psb_i2_hsort_x_mod.f90 @@ -0,0 +1,312 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! Sorting routines +! References: +! D. Knuth +! The Art of Computer Programming, vol. 3 +! Addison-Wesley +! +! Aho, Hopcroft, Ullman +! Data Structures and Algorithms +! Addison-Wesley +! +module psb_i2_hsort_x_mod + use psb_const_mod + use psb_e_hsort_mod + use psb_m_hsort_mod + use psb_i2_hsort_mod + + type psb_i2_heap + integer(psb_ipk_) :: dir + integer(psb_ipk_) :: last + integer(psb_i2pk_), allocatable :: keys(:) + contains + procedure, pass(heap) :: init => psb_i2_init_heap + procedure, pass(heap) :: howmany => psb_i2_howmany + procedure, pass(heap) :: insert => psb_i2_insert_heap + procedure, pass(heap) :: get_first => psb_i2_heap_get_first + procedure, pass(heap) :: dump => psb_i2_dump_heap + procedure, pass(heap) :: free => psb_i2_free_heap + end type psb_i2_heap + + type psb_i2_idx_heap + integer(psb_ipk_) :: dir + integer(psb_ipk_) :: last + integer(psb_i2pk_), allocatable :: keys(:) + integer(psb_ipk_), allocatable :: idxs(:) + contains + procedure, pass(heap) :: init => psb_i2_idx_init_heap + procedure, pass(heap) :: howmany => psb_i2_idx_howmany + procedure, pass(heap) :: insert => psb_i2_idx_insert_heap + procedure, pass(heap) :: get_first => psb_i2_idx_heap_get_first + procedure, pass(heap) :: dump => psb_i2_idx_dump_heap + procedure, pass(heap) :: free => psb_i2_idx_free_heap + end type psb_i2_idx_heap + + +contains + + subroutine psb_i2_init_heap(heap,info,dir) + use psb_realloc_mod, only : psb_ensure_size + implicit none + class(psb_i2_heap), intent(inout) :: heap + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: dir + + info = psb_success_ + heap%last=0 + if (present(dir)) then + heap%dir = dir + else + heap%dir = psb_sort_up_ + endif + select case(heap%dir) + case (psb_sort_up_,psb_sort_down_,psb_asort_up_,psb_asort_down_) + ! ok, do nothing + case default + write(psb_err_unit,*) 'Invalid direction, defaulting to psb_sort_up_' + heap%dir = psb_sort_up_ + end select + call psb_ensure_size(psb_heap_resize,heap%keys,info) + + return + end subroutine psb_i2_init_heap + + + function psb_i2_howmany(heap) result(res) + implicit none + class(psb_i2_heap), intent(in) :: heap + integer(psb_ipk_) :: res + res = heap%last + end function psb_i2_howmany + + subroutine psb_i2_insert_heap(key,heap,info) + use psb_realloc_mod, only : psb_ensure_size + implicit none + + integer(psb_i2pk_), intent(in) :: key + class(psb_i2_heap), intent(inout) :: heap + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + if (heap%last < 0) then + write(psb_err_unit,*) 'Invalid last in heap ',heap%last + info = heap%last + return + endif + + call psb_ensure_size(heap%last+1,heap%keys,info) + if (info /= psb_success_) then + write(psb_err_unit,*) 'Memory allocation failure in heap_insert' + info = -5 + return + end if + call psi_insert_heap(key,& + & heap%last,heap%keys,heap%dir,info) + + return + end subroutine psb_i2_insert_heap + + subroutine psb_i2_heap_get_first(key,heap,info) + implicit none + + class(psb_i2_heap), intent(inout) :: heap + integer(psb_ipk_), intent(out) :: info + integer(psb_i2pk_), intent(out) :: key + + + info = psb_success_ + + call psi_heap_get_first(key,& + & heap%last,heap%keys,heap%dir,info) + + return + end subroutine psb_i2_heap_get_first + + subroutine psb_i2_dump_heap(iout,heap,info) + + implicit none + class(psb_i2_heap), intent(in) :: heap + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in) :: iout + + info = psb_success_ + if (iout < 0) then + write(psb_err_unit,*) 'Invalid file ' + info =-1 + return + end if + + write(iout,*) 'Heap direction ',heap%dir + write(iout,*) 'Heap size ',heap%last + if ((heap%last > 0).and.((.not.allocated(heap%keys)).or.& + & (size(heap%keys) 0).and.((.not.allocated(heap%keys)).or.& + & (size(heap%keys) 0).and.((.not.allocated(heap%idxs)).or.& + & (size(heap%idxs) \namespace psb_base_mod \class psb_i2_base_vect_type + !! The psb_i2_base_vect_type + !! defines a middle level integer(psb_i2pk_) encapsulated dense vector. + !! The encapsulation is needed, in place of a simple array, to allow + !! for complicated situations, such as GPU programming, where the memory + !! area we are interested in is not easily accessible from the host/Fortran + !! side. It is also meant to be encapsulated in an outer type, to allow + !! runtime switching as per the STATE design pattern, similar to the + !! sparse matrix types. + !! + type psb_i2_base_vect_type + !> Values. + integer(psb_i2pk_), allocatable :: v(:) + integer(psb_i2pk_), allocatable :: combuf(:) + integer(psb_mpk_), allocatable :: comid(:,:) + !> vector bldstate: + !! null: pristine; + !! build: it's being filled with entries; + !! assembled: ready to use in computations; + !! update: accepts coefficients but only + !! in already existing entries. + !! The transitions among the states are detailed in + !! psb_T_vect_mod. + integer(psb_ipk_), private :: bldstate = psb_vect_null_ + integer(psb_ipk_), private :: dupl = psb_dupl_null_ + integer(psb_ipk_), private :: ncfs = 0 + integer(psb_ipk_), allocatable :: iv(:) + contains + ! + ! Constructors/allocators + ! + procedure, pass(x) :: bld_x => i2_base_bld_x + procedure, pass(x) :: bld_mn => i2_base_bld_mn + procedure, pass(x) :: bld_en => i2_base_bld_en + generic, public :: bld => bld_x, bld_mn, bld_en + procedure, pass(x) :: all => i2_base_all + procedure, pass(x) :: mold => i2_base_mold + ! + ! Insert/set. Assembly and free. + ! Assembly does almost nothing here, but is important + ! in derived classes. + ! + procedure, pass(x) :: ins_a => i2_base_ins_a + procedure, pass(x) :: ins_v => i2_base_ins_v + generic, public :: ins => ins_a, ins_v + procedure, pass(x) :: zero => i2_base_zero + procedure, pass(x) :: asb_m => i2_base_asb_m + procedure, pass(x) :: asb_e => i2_base_asb_e + generic, public :: asb => asb_m, asb_e + procedure, pass(x) :: free => i2_base_free + procedure, pass(x) :: reinit => i2_base_reinit + procedure, pass(x) :: set_ncfs => i2_base_set_ncfs + procedure, pass(x) :: get_ncfs => i2_base_get_ncfs + procedure, pass(x) :: set_dupl => i2_base_set_dupl + procedure, pass(x) :: get_dupl => i2_base_get_dupl + procedure, pass(x) :: set_state => i2_base_set_state + procedure, pass(x) :: set_null => i2_base_set_null + procedure, pass(x) :: set_bld => i2_base_set_bld + procedure, pass(x) :: set_upd => i2_base_set_upd + procedure, pass(x) :: set_asb => i2_base_set_asb + procedure, pass(x) :: get_state => i2_base_get_state + procedure, pass(x) :: is_null => i2_base_is_null + procedure, pass(x) :: is_bld => i2_base_is_bld + procedure, pass(x) :: is_upd => i2_base_is_upd + procedure, pass(x) :: is_asb => i2_base_is_asb + procedure, pass(x) :: base_cpy => i2_base_cpy + ! + ! Sync: centerpiece of handling of external storage. + ! Any derived class having extra storage upon sync + ! will guarantee that both fortran/host side and + ! external side contain the same data. The base + ! version is only a placeholder. + ! + procedure, pass(x) :: sync => i2_base_sync + procedure, pass(x) :: is_host => i2_base_is_host + procedure, pass(x) :: is_dev => i2_base_is_dev + procedure, pass(x) :: is_sync => i2_base_is_sync + procedure, pass(x) :: set_host => i2_base_set_host + procedure, pass(x) :: set_dev => i2_base_set_dev + procedure, pass(x) :: set_sync => i2_base_set_sync + + ! + ! These are for handling gather/scatter in new + ! comm internals implementation. + ! + procedure, nopass :: use_buffer => i2_base_use_buffer + procedure, pass(x) :: new_buffer => i2_base_new_buffer + procedure, nopass :: device_wait => i2_base_device_wait + procedure, pass(x) :: maybe_free_buffer => i2_base_maybe_free_buffer + procedure, pass(x) :: free_buffer => i2_base_free_buffer + procedure, pass(x) :: new_comid => i2_base_new_comid + procedure, pass(x) :: free_comid => i2_base_free_comid + + ! + ! Basic info + procedure, pass(x) :: get_nrows => i2_base_get_nrows + procedure, pass(x) :: sizeof => i2_base_sizeof + procedure, nopass :: get_fmt => i2_base_get_fmt + ! + ! Set/get data from/to an external array; also + ! overload assignment. + ! + procedure, pass(x) :: get_vect => i2_base_get_vect + procedure, pass(x) :: set_scal => i2_base_set_scal + procedure, pass(x) :: set_vect => i2_base_set_vect + generic, public :: set => set_vect, set_scal + ! + ! Gather/scatter. These are needed for MPI interfacing. + ! May have to be reworked. + ! + procedure, pass(x) :: gthab => i2_base_gthab + procedure, pass(x) :: gthzv => i2_base_gthzv + procedure, pass(x) :: gthzv_x => i2_base_gthzv_x + procedure, pass(x) :: gthzbuf => i2_base_gthzbuf + generic, public :: gth => gthab, gthzv, gthzv_x, gthzbuf + procedure, pass(y) :: sctb => i2_base_sctb + procedure, pass(y) :: sctb_x => i2_base_sctb_x + procedure, pass(y) :: sctb_buf => i2_base_sctb_buf + generic, public :: sct => sctb, sctb_x, sctb_buf + + procedure, pass(x) :: check_addr => i2_base_check_addr + + + + + + + end type psb_i2_base_vect_type + + public :: psb_i2_base_vect + private :: constructor, size_const + interface psb_i2_base_vect + module procedure constructor, size_const + end interface psb_i2_base_vect + +contains + + ! + ! Constructors. + ! + + !> Function constructor: + !! \brief Constructor from an array + !! \param x(:) input array to be copied + !! + function constructor(x) result(this) + integer(psb_i2pk_) :: x(:) + type(psb_i2_base_vect_type) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x,kind=psb_ipk_),info) + end function constructor + + + !> Function constructor: + !! \brief Constructor from size + !! \param n Size of vector to be built. + !! + function size_const(n) result(this) + integer(psb_ipk_), intent(in) :: n + type(psb_i2_base_vect_type) :: this + integer(psb_ipk_) :: info + + call this%asb(n,info) + + end function size_const + + ! + ! Build from a sample + ! + + !> Function bld_x: + !! \memberof psb_i2_base_vect_type + !! \brief Build method from an array + !! \param x(:) input array to be copied + !! + subroutine i2_base_bld_x(x,this,scratch) + use psb_realloc_mod + implicit none + integer(psb_i2pk_), intent(in) :: this(:) + class(psb_i2_base_vect_type), intent(inout) :: x + logical, intent(in), optional :: scratch + + logical :: scratch_ + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (present(scratch)) then + scratch_ = scratch + else + scratch_ = .false. + end if + call psb_realloc(size(this),x%v,info) + if (info /= 0) then + call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') + return + end if +#if defined (PSB_OPENMP) + !$omp parallel do private(i) + do i = 1, size(this) + x%v(i) = this(i) + end do +#else + x%v(:) = this(:) +#endif + end subroutine i2_base_bld_x + + ! + ! Create with size, but no initialization + ! + + !> Function bld_mn: + !! \memberof psb_i2_base_vect_type + !! \brief Build method with size (uninitialized data) + !! \param n size to be allocated. + !! + subroutine i2_base_bld_mn(x,n,scratch) + use psb_realloc_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_i2_base_vect_type), intent(inout) :: x + logical, intent(in), optional :: scratch + + logical :: scratch_ + integer(psb_ipk_) :: info + + if (present(scratch)) then + scratch_ = scratch + else + scratch_ = .false. + end if + call psb_realloc(n,x%v,info) + call x%asb(n,info,scratch=scratch_) + + end subroutine i2_base_bld_mn + + !> Function bld_en: + !! \memberof psb_i2_base_vect_type + !! \brief Build method with size (uninitialized data) + !! \param n size to be allocated. + !! + subroutine i2_base_bld_en(x,n,scratch) + use psb_realloc_mod + implicit none + integer(psb_epk_), intent(in) :: n + class(psb_i2_base_vect_type), intent(inout) :: x + logical, intent(in), optional :: scratch + + logical :: scratch_ + integer(psb_ipk_) :: info + + if (present(scratch)) then + scratch_ = scratch + else + scratch_ = .false. + end if + call psb_realloc(n,x%v,info) + call x%asb(n,info,scratch=scratch_) + + end subroutine i2_base_bld_en + + !> Function base_all: + !! \memberof psb_i2_base_vect_type + !! \brief Build method with size (uninitialized data) and + !! allocation return code. + !! \param n size to be allocated. + !! \param info return code + !! + subroutine i2_base_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_i2_base_vect_type), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,x%v,info) + if (try_newins) then + call psb_realloc(n,x%iv,info) + call x%set_ncfs(0) + end if + + end subroutine i2_base_all + + !> Function base_mold: + !! \memberof psb_i2_base_vect_type + !! \brief Mold method: return a variable with the same dynamic type + !! \param y returned variable + !! \param info return code + !! + subroutine i2_base_mold(x, y, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_i2_base_vect_type), intent(in) :: x + class(psb_i2_base_vect_type), intent(out), allocatable :: y + integer(psb_ipk_), intent(out) :: info + + allocate(psb_i2_base_vect_type :: y, stat=info) + + end subroutine i2_base_mold + + subroutine i2_base_reinit(x, info,clear) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: clear + logical :: clear_ + + info = 0 + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (allocated(x%v)) then + if (x%is_dev()) call x%sync() + if (clear_) x%v(:) = i2zero + call x%set_host() + call x%set_upd() + end if + + end subroutine i2_base_reinit + + ! + ! Insert a bunch of values at specified positions. + ! + !> Function base_ins: + !! \memberof psb_i2_base_vect_type + !! \brief Insert coefficients. + !! + !! + !! Given a list of N pairs + !! (IRL(i),VAL(i)) + !! record a new coefficient in X such that + !! X(IRL(1:N)) = VAL(1:N). + !! + !! - the update operation will perform either + !! X(IRL(1:n)) = VAL(1:N) + !! or + !! X(IRL(1:n)) = X(IRL(1:n))+VAL(1:N) + !! according to the value of DUPLICATE. + !! + !! + !! \param n number of pairs in input + !! \param irl(:) the input row indices + !! \param val(:) the input coefficients + !! \param dupl how to treat duplicate entries + !! \param info return code + !! + ! + subroutine i2_base_ins_a(n,irl,val,dupl,x,maxr,info) + use psi_serial_mod + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl, maxr + integer(psb_ipk_), intent(in) :: irl(:) + integer(psb_i2pk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz, dupl_, ncfs_, k + + info = 0 + if (psb_errstatus_fatal()) return + + if (try_newins) then + if (x%is_bld()) then + ncfs_ = x%get_ncfs() + isz = ncfs_ + n + call psb_ensure_size(isz,x%v,info) + call psb_ensure_size(isz,x%iv,info) + k = ncfs_ + do i = 1, n + !loop over all val's rows + ! row actual block row + if ((1 <= irl(i)).and.(irl(i) <= maxr)) then + k = k + 1 + ! this row belongs to me + ! copy i-th row of block val in x + x%v(k) = val(i) + x%iv(k) = irl(i) + end if + enddo + call x%set_ncfs(k) + + else if (x%is_upd()) then + + dupl_ = x%get_dupl() + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + else if (n > min(size(irl),size(val))) then + info = psb_err_invalid_input_ + else + isz = size(x%v) + select case(dupl_) + case(psb_dupl_ovwrt_) + do i = 1, n + !loop over all val's rows + ! row actual block row + if ((1 <= irl(i)).and.(irl(i) <= maxr)) then + ! this row belongs to me + ! copy i-th row of block val in x + x%v(irl(i)) = val(i) + end if + enddo + + case(psb_dupl_add_) + + do i = 1, n + !loop over all val's rows + if ((1 <= irl(i)).and.(irl(i) <= maxr)) then + ! this row belongs to me + ! copy i-th row of block val in x + x%v(irl(i)) = x%v(irl(i)) + val(i) + end if + enddo + + case default + info = 321 + ! !$ call psb_errpush(info,name) + ! !$ goto 9999 + end select + end if + else + info = psb_err_invalid_vect_state_ + end if + else + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + else if (n > min(size(irl),size(val))) then + info = psb_err_invalid_input_ + + else + isz = size(x%v) + select case(dupl) + case(psb_dupl_ovwrt_) + do i = 1, n + !loop over all val's rows + ! row actual block row + if ((1 <= irl(i)).and.(irl(i) <= isz)) then + ! this row belongs to me + ! copy i-th row of block val in x + x%v(irl(i)) = val(i) + end if + enddo + + case(psb_dupl_add_) + + do i = 1, n + !loop over all val's rows + if ((1 <= irl(i)).and.(irl(i) <= isz)) then + ! this row belongs to me + ! copy i-th row of block val in x + x%v(irl(i)) = x%v(irl(i)) + val(i) + end if + enddo + + case default + info = 321 + ! !$ call psb_errpush(info,name) + ! !$ goto 9999 + end select + end if + end if + call x%set_host() + if (info /= 0) then + call psb_errpush(info,'base_vect_ins') + return + end if + + end subroutine i2_base_ins_a + + subroutine i2_base_ins_v(n,irl,val,dupl,x,maxr,info) + use psi_serial_mod + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl, maxr + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_i2_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: isz + + info = 0 + if (psb_errstatus_fatal()) return + + if (irl%is_dev()) call irl%sync() + if (val%is_dev()) call val%sync() + if (x%is_dev()) call x%sync() + call x%ins(n,irl%v,val%v,dupl,maxr,info) + + if (info /= 0) then + call psb_errpush(info,'base_vect_ins') + return + end if + + end subroutine i2_base_ins_v + + + ! + !> Function base_zero + !! \memberof psb_i2_base_vect_type + !! \brief Zero out contents + !! + ! + subroutine i2_base_zero(x) + use psi_serial_mod + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + + if (allocated(x%v)) then + !$omp workshare + x%v(:)=i2zero + !$omp end workshare + end if + call x%set_host() + end subroutine i2_base_zero + + + ! + ! Assembly. + ! For derived classes: after this the vector + ! storage is supposed to be in sync. + ! + !> Function base_asb: + !! \memberof psb_i2_base_vect_type + !! \brief Assemble vector: reallocate as necessary. + !! + !! \param n final size + !! \param info return code + !! + ! + + subroutine i2_base_asb_m(n, x, info, scratch) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_i2_base_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: scratch + + logical :: scratch_ + integer(psb_ipk_) :: i, ncfs, xvsz + integer(psb_i2pk_), allocatable :: vv(:) + + info = 0 + if (present(scratch)) then + scratch_ = scratch + else + scratch_ = .false. + end if + if (try_newins) then + if (x%is_bld()) then + ncfs = x%get_ncfs() + xvsz = psb_size(x%v) + call psb_realloc(n,vv,info) + vv(:) = i2zero + select case(x%get_dupl()) + case(psb_dupl_add_) + do i=1,ncfs + vv(x%iv(i)) = vv(x%iv(i)) + x%v(i) + end do + case(psb_dupl_ovwrt_) + do i=1,ncfs + vv(x%iv(i)) = x%v(i) + end do + case(psb_dupl_err_) + do i=1,ncfs + if (vv(x%iv(i)).ne. i2zero) then + call psb_errpush(psb_err_duplicate_coo,'vect-asb') + return + else + vv(x%iv(i)) = x%v(i) + end if + end do + case default + write(psb_err_unit,*) 'Error in vect_asb: unsafe dupl',x%get_dupl() + info =-7 + end select + call psb_move_alloc(vv,x%v,info) + if (allocated(x%iv)) deallocate(x%iv,stat=info) + else if (x%is_upd().or.x%is_asb().or.scratch_) then + if (x%get_nrows() < n) & + & call psb_realloc(n,x%v,info) + if (info /= 0) & + & call psb_errpush(psb_err_alloc_dealloc_,'vect_asb') + else + info = psb_err_invalid_vect_state_ + call psb_errpush(info,'vect_asb') + end if + else + if (x%get_nrows() < n) & + & call psb_realloc(n,x%v,info) + if (info /= 0) & + & call psb_errpush(psb_err_alloc_dealloc_,'vect_asb') + end if + call x%set_host() + call x%set_asb() + call x%sync() + end subroutine i2_base_asb_m + + ! + ! Assembly. + ! For derived classes: after this the vector + ! storage is supposed to be in sync. + ! + !> Function base_asb: + !! \memberof psb_i2_base_vect_type + !! \brief Assemble vector: reallocate as necessary. + !! + !! \param n final size + !! \param info return code + !! + ! + + subroutine i2_base_asb_e(n, x, info, scratch) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_epk_), intent(in) :: n + class(psb_i2_base_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: scratch + + logical :: scratch_ + integer(psb_ipk_) :: i, ncfs, xvsz + integer(psb_i2pk_), allocatable :: vv(:) + + info = 0 + if (present(scratch)) then + scratch_ = scratch + else + scratch_ = .false. + end if + if (try_newins) then + if (info /= 0) & + & call psb_errpush(psb_err_alloc_dealloc_,'vect_asb unhandled') + if (x%is_bld()) then + call psb_realloc(n,vv,info) + vv(:) = i2zero + select case(x%get_dupl()) + case(psb_dupl_add_) + do i=1,x%get_ncfs() + vv(x%iv(i)) = vv(x%iv(i)) + x%v(i) + end do + case(psb_dupl_ovwrt_) + do i=1,x%get_ncfs() + vv(x%iv(i)) = x%v(i) + end do + case(psb_dupl_err_) + do i=1,x%get_ncfs() + if (vv(x%iv(i)).ne. i2zero) then + call psb_errpush(psb_err_duplicate_coo,'vect_asb') + return + else + vv(x%iv(i)) = x%v(i) + end if + end do + case default + write(psb_err_unit,*) 'Error in vect_asb: unsafe dupl',x%get_dupl() + info =-7 + end select + call psb_move_alloc(vv,x%v,info) + if (allocated(x%iv)) deallocate(x%iv,stat=info) + else if (x%is_upd().or.x%is_asb().or.scratch_) then + if (x%get_nrows() < n) & + & call psb_realloc(n,x%v,info) + if (info /= 0) & + & call psb_errpush(psb_err_alloc_dealloc_,'vect_asb') + else + info = psb_err_invalid_vect_state_ + call psb_errpush(info,'vect_asb') + end if + else + if (x%get_nrows() < n) & + & call psb_realloc(n,x%v,info) + if (info /= 0) & + & call psb_errpush(psb_err_alloc_dealloc_,'vect_asb') + end if + call x%set_host() + call x%set_asb() + call x%sync() + end subroutine i2_base_asb_e + + ! + !> Function base_free: + !! \memberof psb_i2_base_vect_type + !! \brief Free vector + !! + !! \param info return code + !! + ! + subroutine i2_base_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if ((info == 0).and.allocated(x%combuf)) call x%free_buffer(info) + if ((info == 0).and.allocated(x%comid)) call x%free_comid(info) + if ((info == 0).and.allocated(x%iv)) deallocate(x%iv, stat=info) + if (info /= 0) call & + & psb_errpush(psb_err_alloc_dealloc_,'vect_free') + call x%set_null() + end subroutine i2_base_free + + ! + !> Function base_free_buffer: + !! \memberof psb_i2_base_vect_type + !! \brief Free aux buffer + !! + !! \param info return code + !! + ! + subroutine i2_base_free_buffer(x,info) + use psb_realloc_mod + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + if (allocated(x%combuf)) & + & deallocate(x%combuf,stat=info) + end subroutine i2_base_free_buffer + + ! + !> Function base_maybe_free_buffer: + !! \memberof psb_i2_base_vect_type + !! \brief Conditionally Free aux buffer. + !! In some derived classes, e.g. GPU, + !! does not really frees to avoid runtime + !! costs + !! + !! \param info return code + !! + ! + subroutine i2_base_maybe_free_buffer(x,info) + use psb_realloc_mod + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (psb_get_maybe_free_buffer())& + & call x%free_buffer(info) + + end subroutine i2_base_maybe_free_buffer + + ! + !> Function base_free_comid: + !! \memberof psb_i2_base_vect_type + !! \brief Free aux MPI communication id buffer + !! + !! \param info return code + !! + ! + subroutine i2_base_free_comid(x,info) + use psb_realloc_mod + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + if (allocated(x%comid)) & + & deallocate(x%comid,stat=info) + end subroutine i2_base_free_comid + + function i2_base_get_ncfs(x) result(res) + implicit none + class(psb_i2_base_vect_type), intent(in) :: x + integer(psb_ipk_) :: res + res = x%ncfs + end function i2_base_get_ncfs + + function i2_base_get_dupl(x) result(res) + implicit none + class(psb_i2_base_vect_type), intent(in) :: x + integer(psb_ipk_) :: res + res = x%dupl + end function i2_base_get_dupl + + function i2_base_get_state(x) result(res) + implicit none + class(psb_i2_base_vect_type), intent(in) :: x + integer(psb_ipk_) :: res + res = x%bldstate + end function i2_base_get_state + + function i2_base_is_null(x) result(res) + implicit none + class(psb_i2_base_vect_type), intent(in) :: x + logical :: res + res = (x%bldstate == psb_vect_null_) + end function i2_base_is_null + + function i2_base_is_bld(x) result(res) + implicit none + class(psb_i2_base_vect_type), intent(in) :: x + logical :: res + res = (x%bldstate == psb_vect_bld_) + end function i2_base_is_bld + + function i2_base_is_upd(x) result(res) + implicit none + class(psb_i2_base_vect_type), intent(in) :: x + logical :: res + res = (x%bldstate == psb_vect_upd_) + end function i2_base_is_upd + + function i2_base_is_asb(x) result(res) + implicit none + class(psb_i2_base_vect_type), intent(in) :: x + logical :: res + res = (x%bldstate == psb_vect_asb_) + end function i2_base_is_asb + + subroutine i2_base_set_ncfs(n,x) + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + x%ncfs = n + end subroutine i2_base_set_ncfs + + subroutine i2_base_set_dupl(n,x) + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + x%dupl = n + end subroutine i2_base_set_dupl + + subroutine i2_base_set_state(n,x) + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + x%bldstate = n + end subroutine i2_base_set_state + + subroutine i2_base_set_null(x) + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + + x%bldstate = psb_vect_null_ + end subroutine i2_base_set_null + + subroutine i2_base_set_bld(x) + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + + x%bldstate = psb_vect_bld_ + end subroutine i2_base_set_bld + + subroutine i2_base_set_upd(x) + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + + x%bldstate = psb_vect_upd_ + end subroutine i2_base_set_upd + + subroutine i2_base_set_asb(x) + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + + x%bldstate = psb_vect_asb_ + end subroutine i2_base_set_asb + + ! + ! The base version of SYNC & friends does nothing, it's just + ! a placeholder. + ! + ! + !> Function base_sync: + !! \memberof psb_i2_base_vect_type + !! \brief Sync: base version is a no-op. + !! + ! + subroutine i2_base_sync(x) + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + + end subroutine i2_base_sync + + ! + !> Function base_set_host: + !! \memberof psb_i2_base_vect_type + !! \brief Set_host: base version is a no-op. + !! + ! + subroutine i2_base_set_host(x) + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + + end subroutine i2_base_set_host + + ! + !> Function base_set_dev: + !! \memberof psb_i2_base_vect_type + !! \brief Set_dev: base version is a no-op. + !! + ! + subroutine i2_base_set_dev(x) + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + + end subroutine i2_base_set_dev + + ! + !> Function base_set_sync: + !! \memberof psb_i2_base_vect_type + !! \brief Set_sync: base version is a no-op. + !! + ! + subroutine i2_base_set_sync(x) + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + + end subroutine i2_base_set_sync + + ! + !> Function base_is_dev: + !! \memberof psb_i2_base_vect_type + !! \brief Is vector on external device . + !! + ! + function i2_base_is_dev(x) result(res) + implicit none + class(psb_i2_base_vect_type), intent(in) :: x + logical :: res + + res = .false. + end function i2_base_is_dev + + ! + !> Function base_is_host + !! \memberof psb_i2_base_vect_type + !! \brief Is vector on standard memory . + !! + ! + function i2_base_is_host(x) result(res) + implicit none + class(psb_i2_base_vect_type), intent(in) :: x + logical :: res + + res = .true. + end function i2_base_is_host + + ! + !> Function base_is_sync + !! \memberof psb_i2_base_vect_type + !! \brief Is vector on sync . + !! + ! + function i2_base_is_sync(x) result(res) + implicit none + class(psb_i2_base_vect_type), intent(in) :: x + logical :: res + + res = .true. + end function i2_base_is_sync + + !> Function base_cpy: + !! \memberof psb_d_base_vect_type + !! \brief base_cpy: copy base contents + !! \param y returned variable + !! + subroutine i2_base_cpy(x, y) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_i2_base_vect_type), intent(in) :: x + class(psb_i2_base_vect_type), intent(out) :: y + + if (allocated(x%v)) call y%bld(x%v) + call y%set_state(x%get_state()) + call y%set_dupl(x%get_dupl()) + call y%set_ncfs(x%get_ncfs()) + if (allocated(x%iv)) y%iv = x%iv + end subroutine i2_base_cpy + + ! + ! Size info. + ! + ! + !> Function base_get_nrows + !! \memberof psb_i2_base_vect_type + !! \brief Number of entries + !! + ! + function i2_base_get_nrows(x) result(res) + implicit none + class(psb_i2_base_vect_type), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v) + + end function i2_base_get_nrows + + ! + !> Function base_get_sizeof + !! \memberof psb_i2_base_vect_type + !! \brief Size in bytes + !! + ! + function i2_base_sizeof(x) result(res) + implicit none + class(psb_i2_base_vect_type), intent(in) :: x + integer(psb_epk_) :: res + + ! Force 8-byte integers. + res = (1_psb_epk_ * psb_sizeof_i2p) * x%get_nrows() + + end function i2_base_sizeof + + ! + !> Function base_get_fmt + !! \memberof psb_i2_base_vect_type + !! \brief Format + !! + ! + function i2_base_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'BASE' + end function i2_base_get_fmt + + + ! + ! + ! + !> Function base_get_vect + !! \memberof psb_i2_base_vect_type + !! \brief Extract a copy of the contents + !! + ! + function i2_base_get_vect(x,n) result(res) + class(psb_i2_base_vect_type), intent(inout) :: x + integer(psb_i2pk_), allocatable :: res(:) + integer(psb_ipk_) :: info + integer(psb_ipk_), optional :: n + ! Local variables + integer(psb_ipk_) :: isz, i + + if (.not.allocated(x%v)) return + if (.not.x%is_host()) call x%sync() + isz = x%get_nrows() + if (present(n)) isz = max(0,min(isz,n)) + allocate(res(isz),stat=info) + if (info /= 0) then + call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect') + return + end if + if (.false.) then + res(1:isz) = x%v(1:isz) + else + !$omp parallel do private(i) + do i=1, isz + res(i) = x%v(i) + end do + end if + + end function i2_base_get_vect + + ! + ! Reset all values + ! + ! + !> Function base_set_scal + !! \memberof psb_i2_base_vect_type + !! \brief Set all entries + !! \param val The value to set + !! + subroutine i2_base_set_scal(x,val,first,last) + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + integer(psb_i2pk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: first_, last_, i + + first_=1 + last_=size(x%v) + if (present(first)) first_ = max(1,first) + if (present(last)) last_ = min(last,last_) + + if (x%is_dev()) call x%sync() +#if defined(PSB_OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val + end do +#else + x%v(first_:last_) = val +#endif + call x%set_host() + + end subroutine i2_base_set_scal + + + ! + !> Function base_set_vect + !! \memberof psb_i2_base_vect_type + !! \brief Set all entries + !! \param val(:) The vector to be copied in + !! + subroutine i2_base_set_vect(x,val,first,last) + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + integer(psb_i2pk_), intent(in) :: val(:) + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: first_, last_, i, info + + if (.not.allocated(x%v)) then + call psb_realloc(size(val),x%v,info) + end if + + first_ = 1 + if (present(first)) first_ = max(1,first) + last_ = min(psb_size(x%v),first_+size(val)-1) + if (present(last)) last_ = min(last,last_) + + if (x%is_dev()) call x%sync() + +#if defined(PSB_OPENMP) + !$omp parallel do private(i) + do i = first_, last_ + x%v(i) = val(i-first_+1) + end do +#else + x%v(first_:last_) = val(1:last_-first_+1) +#endif + call x%set_host() + + end subroutine i2_base_set_vect + + subroutine i2_base_check_addr(x) + class(psb_i2_base_vect_type), intent(inout) :: x + + write(0,*) 'Check addr: base version, do nothing' + + end subroutine i2_base_check_addr + + + + ! + ! Gather: Y = beta * Y + alpha * X(IDX(:)) + ! + ! + !> Function base_gthab + !! \memberof psb_i2_base_vect_type + !! \brief gather into an array + !! Y = beta * Y + alpha * X(IDX(:)) + !! \param n how many entries to consider + !! \param idx(:) indices + !! \param alpha + !! \param beta + subroutine i2_base_gthab(n,idx,alpha,x,beta,y) + use psi_serial_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_i2pk_) :: alpha, beta, y(:) + class(psb_i2_base_vect_type) :: x + + if (x%is_dev()) call x%sync() + call psi_gth(n,idx,alpha,x%v,beta,y) + + end subroutine i2_base_gthab + ! + ! shortcut alpha=1 beta=0 + ! + !> Function base_gthzv + !! \memberof psb_i2_base_vect_type + !! \brief gather into an array special alpha=1 beta=0 + !! Y = X(IDX(:)) + !! \param n how many entries to consider + !! \param idx(:) indices + subroutine i2_base_gthzv_x(i,n,idx,x,y) + use psi_serial_mod + implicit none + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n + class(psb_i_base_vect_type) :: idx + integer(psb_i2pk_) :: y(:) + class(psb_i2_base_vect_type) :: x + + if (idx%is_dev()) call idx%sync() + call x%gth(n,idx%v(i:),y) + + end subroutine i2_base_gthzv_x + + ! + ! New comm internals impl. + ! + subroutine i2_base_gthzbuf(i,n,idx,x) + use psi_serial_mod + implicit none + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n + class(psb_i_base_vect_type) :: idx + class(psb_i2_base_vect_type) :: x + + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'gthzbuf') + return + end if + if (idx%is_dev()) call idx%sync() + if (x%is_dev()) call x%sync() + call x%gth(n,idx%v(i:),x%combuf(i:)) + + end subroutine i2_base_gthzbuf + ! + !> Function base_device_wait: + !! \memberof psb_i2_base_vect_type + !! \brief device_wait: base version is a no-op. + !! + ! + subroutine i2_base_device_wait() + implicit none + + end subroutine i2_base_device_wait + + function i2_base_use_buffer() result(res) + logical :: res + + res = .true. + end function i2_base_use_buffer + + subroutine i2_base_new_buffer(n,x,info) + use psb_realloc_mod + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,x%combuf,info) + end subroutine i2_base_new_buffer + + subroutine i2_base_new_comid(n,x,info) + use psb_realloc_mod + implicit none + class(psb_i2_base_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,2_psb_ipk_,x%comid,info) + end subroutine i2_base_new_comid + + + ! + ! shortcut alpha=1 beta=0 + ! + !> Function base_gthzv + !! \memberof psb_i2_base_vect_type + !! \brief gather into an array special alpha=1 beta=0 + !! Y = X(IDX(:)) + !! \param n how many entries to consider + !! \param idx(:) indices + subroutine i2_base_gthzv(n,idx,x,y) + use psi_serial_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_i2pk_) :: y(:) + class(psb_i2_base_vect_type) :: x + + if (x%is_dev()) call x%sync() + call psi_gth(n,idx,x%v,y) + + end subroutine i2_base_gthzv + + ! + ! Scatter: + ! Y(IDX(:)) = beta*Y(IDX(:)) + X(:) + ! + ! + !> Function base_sctb + !! \memberof psb_i2_base_vect_type + !! \brief scatter into a class(base_vect) + !! Y(IDX(:)) = beta * Y(IDX(:)) + X(:) + !! \param n how many entries to consider + !! \param idx(:) indices + !! \param beta + !! \param x(:) + subroutine i2_base_sctb(n,idx,x,beta,y) + use psi_serial_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_i2pk_) :: beta, x(:) + class(psb_i2_base_vect_type) :: y + + if (y%is_dev()) call y%sync() + call psi_sct(n,idx,x,beta,y%v) + call y%set_host() + + end subroutine i2_base_sctb + + subroutine i2_base_sctb_x(i,n,idx,x,beta,y) + use psi_serial_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i + class(psb_i_base_vect_type) :: idx + integer(psb_i2pk_) :: beta, x(:) + class(psb_i2_base_vect_type) :: y + + if (idx%is_dev()) call idx%sync() + call y%sct(n,idx%v(i:),x,beta) + call y%set_host() + + end subroutine i2_base_sctb_x + + subroutine i2_base_sctb_buf(i,n,idx,beta,y) + use psi_serial_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i + class(psb_i_base_vect_type) :: idx + integer(psb_i2pk_) :: beta + class(psb_i2_base_vect_type) :: y + + + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'sctb_buf') + return + end if + if (y%is_dev()) call y%sync() + if (idx%is_dev()) call idx%sync() + call y%sct(n,idx%v(i:),y%combuf(i:),beta) + call y%set_host() + + end subroutine i2_base_sctb_buf + + +end module psb_i2_base_vect_mod + + +module psb_i2_base_multivect_mod + + use psb_const_mod + use psb_error_mod + use psb_realloc_mod + use psb_i2_base_vect_mod + + !> \namespace psb_base_mod \class psb_i2_base_vect_type + !! The psb_i2_base_vect_type + !! defines a middle level integer(psb_ipk_) encapsulated dense vector. + !! The encapsulation is needed, in place of a simple array, to allow + !! for complicated situations, such as GPU programming, where the memory + !! area we are interested in is not easily accessible from the host/Fortran + !! side. It is also meant to be encapsulated in an outer type, to allow + !! runtime switching as per the STATE design pattern, similar to the + !! sparse matrix types. + !! + private + public :: psb_i2_base_multivect, psb_i2_base_multivect_type + + type psb_i2_base_multivect_type + !> Values. + integer(psb_i2pk_), allocatable :: v(:,:) + integer(psb_i2pk_), allocatable :: combuf(:) + integer(psb_mpk_), allocatable :: comid(:,:) + !> vector bldstate: + !! null: pristine; + !! build: it's being filled with entries; + !! assembled: ready to use in computations; + !! update: accepts coefficients but only + !! in already existing entries. + !! The transitions among the states are detailed in + !! psb_T_vect_mod. + integer(psb_ipk_), private :: bldstate = psb_vect_null_ + integer(psb_ipk_), private :: dupl = psb_dupl_null_ + integer(psb_ipk_), private :: ncfs = 0 + integer(psb_ipk_), allocatable :: iv(:) + contains + ! + ! Constructors/allocators + ! + procedure, pass(x) :: bld_x => i2_base_mlv_bld_x + procedure, pass(x) :: bld_n => i2_base_mlv_bld_n + generic, public :: bld => bld_x, bld_n + procedure, pass(x) :: all => i2_base_mlv_all + procedure, pass(x) :: mold => i2_base_mlv_mold + ! + ! Insert/set. Assembly and free. + ! Assembly does almost nothing here, but is important + ! in derived classes. + ! + procedure, pass(x) :: ins => i2_base_mlv_ins + procedure, pass(x) :: zero => i2_base_mlv_zero + procedure, pass(x) :: asb => i2_base_mlv_asb + procedure, pass(x) :: free => i2_base_mlv_free + procedure, pass(x) :: reinit => i2_base_mlv_reinit + procedure, pass(x) :: set_ncfs => i2_base_mlv_set_ncfs + procedure, pass(x) :: get_ncfs => i2_base_mlv_get_ncfs + procedure, pass(x) :: set_dupl => i2_base_mlv_set_dupl + procedure, pass(x) :: get_dupl => i2_base_mlv_get_dupl + procedure, pass(x) :: set_state => i2_base_mlv_set_state + procedure, pass(x) :: set_null => i2_base_mlv_set_null + procedure, pass(x) :: set_bld => i2_base_mlv_set_bld + procedure, pass(x) :: set_upd => i2_base_mlv_set_upd + procedure, pass(x) :: set_asb => i2_base_mlv_set_asb + procedure, pass(x) :: get_state => i2_base_mlv_get_state + procedure, pass(x) :: is_null => i2_base_mlv_is_null + procedure, pass(x) :: is_bld => i2_base_mlv_is_bld + procedure, pass(x) :: is_upd => i2_base_mlv_is_upd + procedure, pass(x) :: is_asb => i2_base_mlv_is_asb + procedure, pass(x) :: base_cpy => i2_base_mlv_cpy + ! + ! Sync: centerpiece of handling of external storage. + ! Any derived class having extra storage upon sync + ! will guarantee that both fortran/host side and + ! external side contain the same data. The base + ! version is only a placeholder. + ! + procedure, pass(x) :: sync => i2_base_mlv_sync + procedure, pass(x) :: is_host => i2_base_mlv_is_host + procedure, pass(x) :: is_dev => i2_base_mlv_is_dev + procedure, pass(x) :: is_sync => i2_base_mlv_is_sync + procedure, pass(x) :: set_host => i2_base_mlv_set_host + procedure, pass(x) :: set_dev => i2_base_mlv_set_dev + procedure, pass(x) :: set_sync => i2_base_mlv_set_sync + + ! + ! Basic info + procedure, pass(x) :: get_nrows => i2_base_mlv_get_nrows + procedure, pass(x) :: get_ncols => i2_base_mlv_get_ncols + procedure, pass(x) :: sizeof => i2_base_mlv_sizeof + procedure, nopass :: get_fmt => i2_base_mlv_get_fmt + ! + ! Set/get data from/to an external array; also + ! overload assignment. + ! + procedure, pass(x) :: get_vect => i2_base_mlv_get_vect + procedure, pass(x) :: set_scal => i2_base_mlv_set_scal + procedure, pass(x) :: set_vect => i2_base_mlv_set_vect + generic, public :: set => set_vect, set_scal + + + ! + ! These are for handling gather/scatter in new + ! comm internals implementation. + ! + procedure, nopass :: use_buffer => i2_base_mlv_use_buffer + procedure, pass(x) :: new_buffer => i2_base_mlv_new_buffer + procedure, nopass :: device_wait => i2_base_mlv_device_wait + procedure, pass(x) :: maybe_free_buffer => i2_base_mlv_maybe_free_buffer + procedure, pass(x) :: free_buffer => i2_base_mlv_free_buffer + procedure, pass(x) :: new_comid => i2_base_mlv_new_comid + procedure, pass(x) :: free_comid => i2_base_mlv_free_comid + + ! + ! Gather/scatter. These are needed for MPI interfacing. + ! May have to be reworked. + ! + procedure, pass(x) :: gthab => i2_base_mlv_gthab + procedure, pass(x) :: gthzv => i2_base_mlv_gthzv + procedure, pass(x) :: gthzm => i2_base_mlv_gthzm + procedure, pass(x) :: gthzv_x => i2_base_mlv_gthzv_x + procedure, pass(x) :: gthzbuf => i2_base_mlv_gthzbuf + generic, public :: gth => gthab, gthzv, gthzm, gthzv_x, gthzbuf + procedure, pass(y) :: sctb => i2_base_mlv_sctb + procedure, pass(y) :: sctbr2 => i2_base_mlv_sctbr2 + procedure, pass(y) :: sctb_x => i2_base_mlv_sctb_x + procedure, pass(y) :: sctb_buf => i2_base_mlv_sctb_buf + generic, public :: sct => sctb, sctbr2, sctb_x, sctb_buf + end type psb_i2_base_multivect_type + + interface psb_i2_base_multivect + module procedure constructor, size_const + end interface psb_i2_base_multivect + +contains + + ! + ! Constructors. + ! + + !> Function constructor: + !! \brief Constructor from an array + !! \param x(:) input array to be copied + !! + function constructor(x) result(this) + integer(psb_i2pk_) :: x(:,:) + type(psb_i2_base_multivect_type) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x,dim=1,kind=psb_ipk_),& + & size(x,dim=2,kind=psb_ipk_),info) + end function constructor + + + !> Function constructor: + !! \brief Constructor from size + !! \param n Size of vector to be built. + !! + function size_const(m,n) result(this) + integer(psb_ipk_), intent(in) :: m,n + type(psb_i2_base_multivect_type) :: this + integer(psb_ipk_) :: info + + call this%asb(m,n,info) + + end function size_const + + ! + ! Build from a sample + ! + + !> Function bld_x: + !! \memberof psb_i2_base_multivect_type + !! \brief Build method from an array + !! \param x(:) input array to be copied + !! + subroutine i2_base_mlv_bld_x(x,this) + use psb_realloc_mod + integer(psb_i2pk_), intent(in) :: this(:,:) + class(psb_i2_base_multivect_type), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this,1),size(this,2),x%v,info) + if (info /= 0) then + call psb_errpush(psb_err_alloc_dealloc_,'base_mlv_vect_bld') + return + end if + x%v(:,:) = this(:,:) + + end subroutine i2_base_mlv_bld_x + + ! + ! Create with size, but no initialization + ! + + !> Function bld_n: + !! \memberof psb_i2_base_multivect_type + !! \brief Build method with size (uninitialized data) + !! \param n size to be allocated. + !! + subroutine i2_base_mlv_bld_n(x,m,n,scratch) + use psb_realloc_mod + integer(psb_ipk_), intent(in) :: m,n + class(psb_i2_base_multivect_type), intent(inout) :: x + integer(psb_ipk_) :: info + logical, intent(in), optional :: scratch + + call psb_realloc(m,n,x%v,info) + call x%asb(m,n,info,scratch) + + end subroutine i2_base_mlv_bld_n + + !> Function base_mlv_all: + !! \memberof psb_i2_base_multivect_type + !! \brief Build method with size (uninitialized data) and + !! allocation return code. + !! \param n size to be allocated. + !! \param info return code + !! + subroutine i2_base_mlv_all(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_i2_base_multivect_type), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(m,n,x%v,info) + if (try_newins) then + call psb_realloc(n,x%iv,info) + call x%set_ncfs(0) + end if + + end subroutine i2_base_mlv_all + + !> Function base_mlv_mold: + !! \memberof psb_i2_base_multivect_type + !! \brief Mold method: return a variable with the same dynamic type + !! \param y returned variable + !! \param info return code + !! + subroutine i2_base_mlv_mold(x, y, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_i2_base_multivect_type), intent(in) :: x + class(psb_i2_base_multivect_type), intent(out), allocatable :: y + integer(psb_ipk_), intent(out) :: info + + allocate(psb_i2_base_multivect_type :: y, stat=info) + + end subroutine i2_base_mlv_mold + + subroutine i2_base_mlv_reinit(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_i2_base_multivect_type), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) then + call x%sync() + x%v(:,:) = i2zero + call x%set_host() + call x%set_upd() + end if + + end subroutine i2_base_mlv_reinit + + ! + ! Insert a bunch of values at specified positions. + ! + !> Function base_mlv_ins: + !! \memberof psb_i2_base_multivect_type + !! \brief Insert coefficients. + !! + !! + !! Given a list of N pairs + !! (IRL(i),VAL(i)) + !! record a new coefficient in X such that + !! X(IRL(1:N)) = VAL(1:N). + !! + !! - the update operation will perform either + !! X(IRL(1:n)) = VAL(1:N) + !! or + !! X(IRL(1:n)) = X(IRL(1:n))+VAL(1:N) + !! according to the value of DUPLICATE. + !! + !! + !! \param n number of pairs in input + !! \param irl(:) the input row indices + !! \param val(:) the input coefficients + !! \param dupl how to treat duplicate entries + !! \param info return code + !! + ! + subroutine i2_base_mlv_ins(n,irl,val,dupl,x,maxr,info) + use psi_serial_mod + implicit none + class(psb_i2_base_multivect_type), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl,maxr + integer(psb_ipk_), intent(in) :: irl(:) + integer(psb_i2pk_), intent(in) :: val(:,:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz, nc, dupl_, ncfs_, k + + info = 0 + if (psb_errstatus_fatal()) return + + if (try_newins) then + if (x%is_bld()) then + nc = size(x%v,2) + ncfs_ = x%get_ncfs() + isz = ncfs_ + n + call psb_realloc(isz,nc,x%v,info) + call psb_ensure_size(isz,x%iv,info) + k = ncfs_ + do i = 1, n + !loop over all val's rows + ! row actual block row + if ((1 <= irl(i)).and.(irl(i) <= maxr)) then + k = k + 1 + ! this row belongs to me + ! copy i-th row of block val in x + x%v(k,:) = val(i,:) + x%iv(k) = irl(i) + end if + enddo + call x%set_ncfs(k) + + else if (x%is_upd()) then + + dupl_ = x%get_dupl() + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + else if (n > min(size(irl),size(val))) then + info = psb_err_invalid_input_ + else + isz = size(x%v,1) + nc = size(x%v,2) + select case(dupl_) + case(psb_dupl_ovwrt_) + do i = 1, n + !loop over all val's rows + ! row actual block row + if ((1 <= irl(i)).and.(irl(i) <= maxr)) then + ! this row belongs to me + ! copy i-th row of block val in x + x%v(irl(i),:) = val(i,:) + end if + enddo + + case(psb_dupl_add_) + + do i = 1, n + !loop over all val's rows + if ((1 <= irl(i)).and.(irl(i) <= maxr)) then + ! this row belongs to me + ! copy i-th row of block val in x + x%v(irl(i),:) = x%v(irl(i),:) + val(i,:) + end if + enddo + + case default + info = 321 + ! !$ call psb_errpush(info,name) + ! !$ goto 9999 + end select + end if + else + info = psb_err_invalid_vect_state_ + end if + else + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + else if (n > min(size(irl),size(val))) then + info = psb_err_invalid_input_ + + else + isz = size(x%v,1) + nc = size(x%v,2) + select case(dupl) + case(psb_dupl_ovwrt_) + do i = 1, n + !loop over all val's rows + ! row actual block row + if ((1 <= irl(i)).and.(irl(i) <= isz)) then + ! this row belongs to me + ! copy i-th row of block val in x + x%v(irl(i),:) = val(i,:) + end if + enddo + + case(psb_dupl_add_) + + do i = 1, n + !loop over all val's rows + if ((1 <= irl(i)).and.(irl(i) <= isz)) then + ! this row belongs to me + ! copy i-th row of block val in x + x%v(irl(i),:) = x%v(irl(i),:) + val(i,:) + end if + enddo + + case default + info = 321 + ! !$ call psb_errpush(info,name) + ! !$ goto 9999 + end select + end if + end if + call x%set_host() + if (info /= 0) then + call psb_errpush(info,'base_mlv_vect_ins') + return + end if + + end subroutine i2_base_mlv_ins + + ! + !> Function base_mlv_zero + !! \memberof psb_i2_base_multivect_type + !! \brief Zero out contents + !! + ! + subroutine i2_base_mlv_zero(x) + use psi_serial_mod + implicit none + class(psb_i2_base_multivect_type), intent(inout) :: x + + if (allocated(x%v)) x%v=i2zero + call x%set_host() + + end subroutine i2_base_mlv_zero + + + ! + ! Assembly. + ! For derived classes: after this the vector + ! storage is supposed to be in sync. + ! + !> Function base_mlv_asb: + !! \memberof psb_i2_base_multivect_type + !! \brief Assemble vector: reallocate as necessary. + !! + !! \param n final size + !! \param info return code + !! + ! + + subroutine i2_base_mlv_asb(m,n, x, info, scratch) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_i2_base_multivect_type), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: scratch + + logical :: scratch_ + integer(psb_ipk_) :: i, ncfs, xvsz + integer(psb_i2pk_), allocatable :: vv(:,:) + + info = 0 + if (present(scratch)) then + scratch_ = scratch + else + scratch_ = .false. + end if + if (try_newins) then + if (x%is_bld()) then + ncfs = x%get_ncfs() + xvsz = psb_size(x%v) + call psb_realloc(m,n,vv,info) + vv(:,:) = i2zero + select case(x%get_dupl()) + case(psb_dupl_add_) + do i=1,ncfs + vv(x%iv(i),:) = vv(x%iv(i),:) + x%v(i,:) + end do + case(psb_dupl_ovwrt_) + do i=1,ncfs + vv(x%iv(i),:) = x%v(i,:) + end do + case(psb_dupl_err_) + do i=1,ncfs + if (any(vv(x%iv(i),:).ne.i2zero)) then + call psb_errpush(psb_err_duplicate_coo,'vect-asb') + return + else + vv(x%iv(i),:) = x%v(i,:) + end if + end do + case default + write(psb_err_unit,*) 'Error in vect_asb: unsafe dupl',x%get_dupl() + info =-7 + end select + call psb_move_alloc(vv,x%v,info) + if (allocated(x%iv)) deallocate(x%iv,stat=info) + else if (x%is_upd().or.x%is_asb().or.scratch_) then + if (x%get_nrows() < m) & + & call psb_realloc(m,n,x%v,info) + if (info /= 0) & + & call psb_errpush(psb_err_alloc_dealloc_,'vect_asb') + else + info = psb_err_invalid_vect_state_ + call psb_errpush(info,'vect_asb') + end if + else + if ((x%get_nrows() < m).or.(x%get_ncols() Function base_mlv_free: + !! \memberof psb_i2_base_multivect_type + !! \brief Free vector + !! + !! \param info return code + !! + ! + subroutine i2_base_mlv_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_i2_base_multivect_type), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if (info /= 0) call & + & psb_errpush(psb_err_alloc_dealloc_,'vect_free') + + end subroutine i2_base_mlv_free + + function i2_base_mlv_get_ncfs(x) result(res) + implicit none + class(psb_i2_base_multivect_type), intent(in) :: x + integer(psb_ipk_) :: res + res = x%ncfs + end function i2_base_mlv_get_ncfs + + function i2_base_mlv_get_dupl(x) result(res) + implicit none + class(psb_i2_base_multivect_type), intent(in) :: x + integer(psb_ipk_) :: res + res = x%dupl + end function i2_base_mlv_get_dupl + + function i2_base_mlv_get_state(x) result(res) + implicit none + class(psb_i2_base_multivect_type), intent(in) :: x + integer(psb_ipk_) :: res + res = x%bldstate + end function i2_base_mlv_get_state + + function i2_base_mlv_is_null(x) result(res) + implicit none + class(psb_i2_base_multivect_type), intent(in) :: x + logical :: res + res = (x%bldstate == psb_vect_null_) + end function i2_base_mlv_is_null + + function i2_base_mlv_is_bld(x) result(res) + implicit none + class(psb_i2_base_multivect_type), intent(in) :: x + logical :: res + res = (x%bldstate == psb_vect_bld_) + end function i2_base_mlv_is_bld + + function i2_base_mlv_is_upd(x) result(res) + implicit none + class(psb_i2_base_multivect_type), intent(in) :: x + logical :: res + res = (x%bldstate == psb_vect_upd_) + end function i2_base_mlv_is_upd + + function i2_base_mlv_is_asb(x) result(res) + implicit none + class(psb_i2_base_multivect_type), intent(in) :: x + logical :: res + res = (x%bldstate == psb_vect_asb_) + end function i2_base_mlv_is_asb + + subroutine i2_base_mlv_set_ncfs(n,x) + implicit none + class(psb_i2_base_multivect_type), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + x%ncfs = n + end subroutine i2_base_mlv_set_ncfs + + subroutine i2_base_mlv_set_dupl(n,x) + implicit none + class(psb_i2_base_multivect_type), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + x%dupl = n + end subroutine i2_base_mlv_set_dupl + + subroutine i2_base_mlv_set_state(n,x) + implicit none + class(psb_i2_base_multivect_type), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + x%bldstate = n + end subroutine i2_base_mlv_set_state + + subroutine i2_base_mlv_set_null(x) + implicit none + class(psb_i2_base_multivect_type), intent(inout) :: x + + x%bldstate = psb_vect_null_ + end subroutine i2_base_mlv_set_null + + subroutine i2_base_mlv_set_bld(x) + implicit none + class(psb_i2_base_multivect_type), intent(inout) :: x + + x%bldstate = psb_vect_bld_ + end subroutine i2_base_mlv_set_bld + + subroutine i2_base_mlv_set_upd(x) + implicit none + class(psb_i2_base_multivect_type), intent(inout) :: x + + x%bldstate = psb_vect_upd_ + end subroutine i2_base_mlv_set_upd + + subroutine i2_base_mlv_set_asb(x) + implicit none + class(psb_i2_base_multivect_type), intent(inout) :: x + + x%bldstate = psb_vect_asb_ + end subroutine i2_base_mlv_set_asb + + + ! + ! The base version of SYNC & friends does nothing, it's just + ! a placeholder. + ! + ! + !> Function base_mlv_sync: + !! \memberof psb_i2_base_multivect_type + !! \brief Sync: base version is a no-op. + !! + ! + subroutine i2_base_mlv_sync(x) + implicit none + class(psb_i2_base_multivect_type), intent(inout) :: x + + end subroutine i2_base_mlv_sync + + ! + !> Function base_mlv_set_host: + !! \memberof psb_i2_base_multivect_type + !! \brief Set_host: base version is a no-op. + !! + ! + subroutine i2_base_mlv_set_host(x) + implicit none + class(psb_i2_base_multivect_type), intent(inout) :: x + + end subroutine i2_base_mlv_set_host + + ! + !> Function base_mlv_set_dev: + !! \memberof psb_i2_base_multivect_type + !! \brief Set_dev: base version is a no-op. + !! + ! + subroutine i2_base_mlv_set_dev(x) + implicit none + class(psb_i2_base_multivect_type), intent(inout) :: x + + end subroutine i2_base_mlv_set_dev + + ! + !> Function base_mlv_set_sync: + !! \memberof psb_i2_base_multivect_type + !! \brief Set_sync: base version is a no-op. + !! + ! + subroutine i2_base_mlv_set_sync(x) + implicit none + class(psb_i2_base_multivect_type), intent(inout) :: x + + end subroutine i2_base_mlv_set_sync + + ! + !> Function base_mlv_is_dev: + !! \memberof psb_i2_base_multivect_type + !! \brief Is vector on external device . + !! + ! + function i2_base_mlv_is_dev(x) result(res) + implicit none + class(psb_i2_base_multivect_type), intent(in) :: x + logical :: res + + res = .false. + end function i2_base_mlv_is_dev + + ! + !> Function base_mlv_is_host + !! \memberof psb_i2_base_multivect_type + !! \brief Is vector on standard memory . + !! + ! + function i2_base_mlv_is_host(x) result(res) + implicit none + class(psb_i2_base_multivect_type), intent(in) :: x + logical :: res + + res = .true. + end function i2_base_mlv_is_host + + ! + !> Function base_mlv_is_sync + !! \memberof psb_i2_base_multivect_type + !! \brief Is vector on sync . + !! + ! + function i2_base_mlv_is_sync(x) result(res) + implicit none + class(psb_i2_base_multivect_type), intent(in) :: x + logical :: res + + res = .true. + end function i2_base_mlv_is_sync + + !> Function base_cpy: + !! \memberof psb_d_base_vect_type + !! \brief base_cpy: copy base contents + !! \param y returned variable + !! + subroutine i2_base_mlv_cpy(x, y) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_i2_base_multivect_type), intent(in) :: x + class(psb_i2_base_multivect_type), intent(out) :: y + + if (allocated(x%v)) call y%bld(x%v) + call y%set_state(x%get_state()) + call y%set_dupl(x%get_dupl()) + call y%set_ncfs(x%get_ncfs()) + if (allocated(x%iv)) y%iv = x%iv + end subroutine i2_base_mlv_cpy + + + ! + ! Size info. + ! + ! + !> Function base_mlv_get_nrows + !! \memberof psb_i2_base_multivect_type + !! \brief Number of entries + !! + ! + function i2_base_mlv_get_nrows(x) result(res) + implicit none + class(psb_i2_base_multivect_type), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v,1) + + end function i2_base_mlv_get_nrows + + function i2_base_mlv_get_ncols(x) result(res) + implicit none + class(psb_i2_base_multivect_type), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v,2) + + end function i2_base_mlv_get_ncols + + ! + !> Function base_mlv_get_sizeof + !! \memberof psb_i2_base_multivect_type + !! \brief Size in bytesa + !! + ! + function i2_base_mlv_sizeof(x) result(res) + implicit none + class(psb_i2_base_multivect_type), intent(in) :: x + integer(psb_epk_) :: res + + ! Force 8-byte integers. + res = (1_psb_epk_ * psb_sizeof_i2p) * x%get_nrows() * x%get_ncols() + + end function i2_base_mlv_sizeof + + ! + !> Function base_mlv_get_fmt + !! \memberof psb_i2_base_multivect_type + !! \brief Format + !! + ! + function i2_base_mlv_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'BASE' + end function i2_base_mlv_get_fmt + + + ! + ! + ! + !> Function base_mlv_get_vect + !! \memberof psb_i2_base_multivect_type + !! \brief Extract a copy of the contents + !! + ! + function i2_base_mlv_get_vect(x) result(res) + implicit none + class(psb_i2_base_multivect_type), intent(inout) :: x + integer(psb_i2pk_), allocatable :: res(:,:) + integer(psb_ipk_) :: info,m,n + m = x%get_nrows() + n = x%get_ncols() + if (.not.allocated(x%v)) return + call x%sync() + allocate(res(m,n),stat=info) + if (info /= 0) then + call psb_errpush(psb_err_alloc_dealloc_,'base_mlv_get_vect') + return + end if + res(1:m,1:n) = x%v(1:m,1:n) + end function i2_base_mlv_get_vect + + ! + ! Reset all values + ! + ! + !> Function base_mlv_set_scal + !! \memberof psb_i2_base_multivect_type + !! \brief Set all entries + !! \param val The value to set + !! + subroutine i2_base_mlv_set_scal(x,val) + implicit none + class(psb_i2_base_multivect_type), intent(inout) :: x + integer(psb_i2pk_), intent(in) :: val + + integer(psb_ipk_) :: info + x%v = val + + end subroutine i2_base_mlv_set_scal + + ! + !> Function base_mlv_set_vect + !! \memberof psb_i2_base_multivect_type + !! \brief Set all entries + !! \param val(:) The vector to be copied in + !! + subroutine i2_base_mlv_set_vect(x,val) + implicit none + class(psb_i2_base_multivect_type), intent(inout) :: x + integer(psb_i2pk_), intent(in) :: val(:,:) + integer(psb_ipk_) :: nr, nc + integer(psb_ipk_) :: info + + if (allocated(x%v)) then + nr = min(size(x%v,1),size(val,1)) + nc = min(size(x%v,2),size(val,2)) + + x%v(1:nr,1:nc) = val(1:nr,1:nc) + else + x%v = val + end if + + end subroutine i2_base_mlv_set_vect + + + function i2_base_mlv_use_buffer() result(res) + implicit none + logical :: res + + res = .true. + end function i2_base_mlv_use_buffer + + subroutine i2_base_mlv_new_buffer(n,x,info) + use psb_realloc_mod + implicit none + class(psb_i2_base_multivect_type), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: nc + nc = x%get_ncols() + call psb_realloc(n*nc,x%combuf,info) + end subroutine i2_base_mlv_new_buffer + + subroutine i2_base_mlv_new_comid(n,x,info) + use psb_realloc_mod + implicit none + class(psb_i2_base_multivect_type), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,2_psb_ipk_,x%comid,info) + end subroutine i2_base_mlv_new_comid + + + subroutine i2_base_mlv_maybe_free_buffer(x,info) + use psb_realloc_mod + implicit none + class(psb_i2_base_multivect_type), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + + info = 0 + if (psb_get_maybe_free_buffer())& + & call x%free_buffer(info) + + end subroutine i2_base_mlv_maybe_free_buffer + + subroutine i2_base_mlv_free_buffer(x,info) + use psb_realloc_mod + implicit none + class(psb_i2_base_multivect_type), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + if (allocated(x%combuf)) & + & deallocate(x%combuf,stat=info) + end subroutine i2_base_mlv_free_buffer + + subroutine i2_base_mlv_free_comid(x,info) + use psb_realloc_mod + implicit none + class(psb_i2_base_multivect_type), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + if (allocated(x%comid)) & + & deallocate(x%comid,stat=info) + end subroutine i2_base_mlv_free_comid + + + ! + ! Gather: Y = beta * Y + alpha * X(IDX(:)) + ! + ! + !> Function base_mlv_gthab + !! \memberof psb_i2_base_multivect_type + !! \brief gather into an array + !! Y = beta * Y + alpha * X(IDX(:)) + !! \param n how many entries to consider + !! \param idx(:) indices + !! \param alpha + !! \param beta + subroutine i2_base_mlv_gthab(n,idx,alpha,x,beta,y) + use psi_serial_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_i2pk_) :: alpha, beta, y(:) + class(psb_i2_base_multivect_type) :: x + integer(psb_mpk_) :: nc + + if (x%is_dev()) call x%sync() + if (.not.allocated(x%v)) then + return + end if + nc = psb_size(x%v,2_psb_ipk_) + call psi_gth(n,nc,idx,alpha,x%v,beta,y) + + end subroutine i2_base_mlv_gthab + ! + ! shortcut alpha=1 beta=0 + ! + !> Function base_mlv_gthzv + !! \memberof psb_i2_base_multivect_type + !! \brief gather into an array special alpha=1 beta=0 + !! Y = X(IDX(:)) + !! \param n how many entries to consider + !! \param idx(:) indices + subroutine i2_base_mlv_gthzv_x(i,n,idx,x,y) + use psi_serial_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i + class(psb_i_base_vect_type) :: idx + integer(psb_i2pk_) :: y(:) + class(psb_i2_base_multivect_type) :: x + + if (x%is_dev()) call x%sync() + call x%gth(n,idx%v(i:),y) + + end subroutine i2_base_mlv_gthzv_x + + ! + ! shortcut alpha=1 beta=0 + ! + !> Function base_mlv_gthzv + !! \memberof psb_i2_base_multivect_type + !! \brief gather into an array special alpha=1 beta=0 + !! Y = X(IDX(:)) + !! \param n how many entries to consider + !! \param idx(:) indices + subroutine i2_base_mlv_gthzv(n,idx,x,y) + use psi_serial_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_i2pk_) :: y(:) + class(psb_i2_base_multivect_type) :: x + integer(psb_mpk_) :: nc + + if (x%is_dev()) call x%sync() + if (.not.allocated(x%v)) then + return + end if + nc = psb_size(x%v,2_psb_ipk_) + + call psi_gth(n,nc,idx,x%v,y) + + end subroutine i2_base_mlv_gthzv + ! + ! shortcut alpha=1 beta=0 + ! + !> Function base_mlv_gthzv + !! \memberof psb_i2_base_multivect_type + !! \brief gather into an array special alpha=1 beta=0 + !! Y = X(IDX(:)) + !! \param n how many entries to consider + !! \param idx(:) indices + subroutine i2_base_mlv_gthzm(n,idx,x,y) + use psi_serial_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_i2pk_) :: y(:,:) + class(psb_i2_base_multivect_type) :: x + integer(psb_mpk_) :: nc + + if (x%is_dev()) call x%sync() + if (.not.allocated(x%v)) then + return + end if + nc = psb_size(x%v,2_psb_ipk_) + + call psi_gth(n,nc,idx,x%v,y) + + end subroutine i2_base_mlv_gthzm + + ! + ! New comm internals impl. + ! + subroutine i2_base_mlv_gthzbuf(i,ixb,n,idx,x) + use psi_serial_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i, ixb + class(psb_i_base_vect_type) :: idx + class(psb_i2_base_multivect_type) :: x + integer(psb_ipk_) :: nc + + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'gthzbuf') + return + end if + if (idx%is_dev()) call idx%sync() + if (x%is_dev()) call x%sync() + nc = x%get_ncols() + call x%gth(n,idx%v(i:),x%combuf(ixb:)) + + end subroutine i2_base_mlv_gthzbuf + + ! + ! Scatter: + ! Y(IDX(:),:) = beta*Y(IDX(:),:) + X(:) + ! + ! + !> Function base_mlv_sctb + !! \memberof psb_i2_base_multivect_type + !! \brief scatter into a class(base_mlv_vect) + !! Y(IDX(:)) = beta * Y(IDX(:)) + X(:) + !! \param n how many entries to consider + !! \param idx(:) indices + !! \param beta + !! \param x(:) + subroutine i2_base_mlv_sctb(n,idx,x,beta,y) + use psi_serial_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_i2pk_) :: beta, x(:) + class(psb_i2_base_multivect_type) :: y + integer(psb_mpk_) :: nc + + if (y%is_dev()) call y%sync() + nc = psb_size(y%v,2_psb_ipk_) + call psi_sct(n,nc,idx,x,beta,y%v) + call y%set_host() + + end subroutine i2_base_mlv_sctb + + subroutine i2_base_mlv_sctbr2(n,idx,x,beta,y) + use psi_serial_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_i2pk_) :: beta, x(:,:) + class(psb_i2_base_multivect_type) :: y + integer(psb_mpk_) :: nc + + if (y%is_dev()) call y%sync() + nc = y%get_ncols() + call psi_sct(n,nc,idx,x,beta,y%v) + call y%set_host() + + end subroutine i2_base_mlv_sctbr2 + + subroutine i2_base_mlv_sctb_x(i,n,idx,x,beta,y) + use psi_serial_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i + class(psb_i_base_vect_type) :: idx + integer( psb_i2pk_) :: beta, x(:) + class(psb_i2_base_multivect_type) :: y + + call y%sct(n,idx%v(i:),x,beta) + + end subroutine i2_base_mlv_sctb_x + + subroutine i2_base_mlv_sctb_buf(i,iyb,n,idx,beta,y) + use psi_serial_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i, iyb + class(psb_i_base_vect_type) :: idx + integer(psb_i2pk_) :: beta + class(psb_i2_base_multivect_type) :: y + integer(psb_ipk_) :: nc + + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'sctb_buf') + return + end if + if (y%is_dev()) call y%sync() + if (idx%is_dev()) call idx%sync() + nc = y%get_ncols() + call y%sct(n,idx%v(i:),y%combuf(iyb:),beta) + call y%set_host() + + end subroutine i2_base_mlv_sctb_buf + + ! + !> Function base_device_wait: + !! \memberof psb_i2_base_vect_type + !! \brief device_wait: base version is a no-op. + !! + ! + subroutine i2_base_mlv_device_wait() + implicit none + + end subroutine i2_base_mlv_device_wait + +end module psb_i2_base_multivect_mod diff --git a/base/modules/serial/psb_i2_vect_mod.F90 b/base/modules/serial/psb_i2_vect_mod.F90 new file mode 100644 index 00000000..db3e4318 --- /dev/null +++ b/base/modules/serial/psb_i2_vect_mod.F90 @@ -0,0 +1,1271 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! package: psb_i2_vect_mod +! +! This module contains the definition of the psb_i2_vect type which +! is the outer container for dense vectors. +! Therefore all methods simply invoke the corresponding methods of the +! inner component. +! +module psb_i2_vect_mod + + use psb_realloc_mod + use psb_i2_base_vect_mod + use psb_i_vect_mod + + type psb_i2_vect_type + class(psb_i2_base_vect_type), allocatable :: v + integer(psb_ipk_) :: nrmv = 0 + integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ + integer(psb_ipk_) :: dupl = psb_dupl_add_ + integer(psb_i2pk_), allocatable :: rmtv(:) + integer(psb_lpk_), allocatable :: rmidx(:) + contains + procedure, pass(x) :: get_nrows => i2_vect_get_nrows + procedure, pass(x) :: sizeof => i2_vect_sizeof + procedure, pass(x) :: get_fmt => i2_vect_get_fmt + procedure, pass(x) :: is_remote_build => i2_vect_is_remote_build + procedure, pass(x) :: set_remote_build => i2_vect_set_remote_build + procedure, pass(x) :: get_nrmv => i2_vect_get_nrmv + procedure, pass(x) :: set_nrmv => i2_vect_set_nrmv + procedure, pass(x) :: all => i2_vect_all + procedure, pass(x) :: reall => i2_vect_reall + procedure, pass(x) :: zero => i2_vect_zero + procedure, pass(x) :: asb => i2_vect_asb + procedure, pass(x) :: set_dupl => i2_vect_set_dupl + procedure, pass(x) :: get_dupl => i2_vect_get_dupl + procedure, pass(x) :: set_ncfs => i2_vect_set_ncfs + procedure, pass(x) :: get_ncfs => i2_vect_get_ncfs + procedure, pass(x) :: set_state => i2_vect_set_state + procedure, pass(x) :: set_null => i2_vect_set_null + procedure, pass(x) :: set_bld => i2_vect_set_bld + procedure, pass(x) :: set_upd => i2_vect_set_upd + procedure, pass(x) :: set_asb => i2_vect_set_asb + procedure, pass(x) :: get_state => i2_vect_get_state + procedure, pass(x) :: is_null => i2_vect_is_null + procedure, pass(x) :: is_bld => i2_vect_is_bld + procedure, pass(x) :: is_upd => i2_vect_is_upd + procedure, pass(x) :: is_asb => i2_vect_is_asb + procedure, pass(x) :: reinit => i2_vect_reinit + + procedure, pass(x) :: gthab => i2_vect_gthab + procedure, pass(x) :: gthzv => i2_vect_gthzv + generic, public :: gth => gthab, gthzv + procedure, pass(y) :: sctb => i2_vect_sctb + generic, public :: sct => sctb + procedure, pass(x) :: free => i2_vect_free + procedure, pass(x) :: ins_a => i2_vect_ins_a + procedure, pass(x) :: ins_v => i2_vect_ins_v + generic, public :: ins => ins_v, ins_a + procedure, pass(x) :: bld_x => i2_vect_bld_x + procedure, pass(x) :: bld_mn => i2_vect_bld_mn + procedure, pass(x) :: bld_en => i2_vect_bld_en + generic, public :: bld => bld_x, bld_mn, bld_en + procedure, pass(x) :: get_vect => i2_vect_get_vect + procedure, pass(x) :: cnv => i2_vect_cnv + procedure, pass(x) :: set_scal => i2_vect_set_scal + procedure, pass(x) :: set_vect => i2_vect_set_vect + generic, public :: set => set_vect, set_scal + procedure, pass(x) :: clone => i2_vect_clone + + procedure, pass(x) :: sync => i2_vect_sync + procedure, pass(x) :: is_host => i2_vect_is_host + procedure, pass(x) :: is_dev => i2_vect_is_dev + procedure, pass(x) :: is_sync => i2_vect_is_sync + procedure, pass(x) :: set_host => i2_vect_set_host + procedure, pass(x) :: set_dev => i2_vect_set_dev + procedure, pass(x) :: set_sync => i2_vect_set_sync + procedure, pass(x) :: check_addr => i2_vect_check_addr + + + + end type psb_i2_vect_type + + public :: psb_i2_vect + private :: constructor, size_const + interface psb_i2_vect + module procedure constructor, size_const + end interface psb_i2_vect + + private :: i2_vect_get_nrows, i2_vect_sizeof, i2_vect_get_fmt, & + & i2_vect_all, i2_vect_reall, i2_vect_zero, i2_vect_asb, & + & i2_vect_gthab, i2_vect_gthzv, i2_vect_sctb, & + & i2_vect_free, i2_vect_ins_a, i2_vect_ins_v, i2_vect_bld_x, & + & i2_vect_bld_mn, i2_vect_bld_en, i2_vect_get_vect, & + & i2_vect_cnv, i2_vect_set_scal, & + & i2_vect_set_vect, i2_vect_clone, i2_vect_sync, i2_vect_is_host, & + & i2_vect_is_dev, i2_vect_is_sync, i2_vect_set_host, & + & i2_vect_set_dev, i2_vect_set_sync, & + & i2_vect_set_remote_build, i2_is_remote_build, & + & i2_vect_set_dupl, i2_get_dupl, i2_vect_set_nrmv, i2_get_nrmv + + + class(psb_i2_base_vect_type), allocatable, target,& + & save, private :: psb_i2_base_vect_default + + interface psb_set_vect_default + module procedure psb_i2_set_vect_default + end interface psb_set_vect_default + + interface psb_get_vect_default + module procedure psb_i2_get_vect_default + end interface psb_get_vect_default + + +contains + + function i2_vect_get_dupl(x) result(res) + implicit none + class(psb_i2_vect_type), intent(in) :: x + integer(psb_ipk_) :: res + if (allocated(x%v)) then + res = x%v%get_dupl() + else + res = psb_dupl_null_ + end if + end function i2_vect_get_dupl + + subroutine i2_vect_set_dupl(x,val) + implicit none + class(psb_i2_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(in), optional :: val + + if (allocated(x%v)) then + if (present(val)) then + call x%v%set_dupl(val) + else + call x%v%set_dupl(psb_dupl_def_) + end if + end if + end subroutine i2_vect_set_dupl + + function i2_vect_get_ncfs(x) result(res) + implicit none + class(psb_i2_vect_type), intent(in) :: x + integer(psb_ipk_) :: res + if (allocated(x%v)) then + res = x%v%get_ncfs() + else + res = 0 + end if + end function i2_vect_get_ncfs + + subroutine i2_vect_set_ncfs(x,val) + implicit none + class(psb_i2_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(in), optional :: val + + if (allocated(x%v)) then + if (present(val)) then + call x%v%set_ncfs(val) + else + call x%v%set_ncfs(0) + end if + end if + end subroutine i2_vect_set_ncfs + + function i2_vect_get_state(x) result(res) + implicit none + class(psb_i2_vect_type), intent(in) :: x + integer(psb_ipk_) :: res + if (allocated(x%v)) then + res = x%v%get_state() + else + res = psb_vect_null_ + end if + end function i2_vect_get_state + + function i2_vect_is_null(x) result(res) + implicit none + class(psb_i2_vect_type), intent(in) :: x + logical :: res + res = (x%get_state() == psb_vect_null_) + end function i2_vect_is_null + + function i2_vect_is_bld(x) result(res) + implicit none + class(psb_i2_vect_type), intent(in) :: x + logical :: res + res = (x%get_state() == psb_vect_bld_) + end function i2_vect_is_bld + + function i2_vect_is_upd(x) result(res) + implicit none + class(psb_i2_vect_type), intent(in) :: x + logical :: res + res = (x%get_state() == psb_vect_upd_) + end function i2_vect_is_upd + + function i2_vect_is_asb(x) result(res) + implicit none + class(psb_i2_vect_type), intent(in) :: x + logical :: res + res = (x%get_state() == psb_vect_asb_) + end function i2_vect_is_asb + + subroutine i2_vect_set_state(n,x) + implicit none + class(psb_i2_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + if (allocated(x%v)) then + call x%v%set_state(n) + end if + end subroutine i2_vect_set_state + + + subroutine i2_vect_set_null(x) + implicit none + class(psb_i2_vect_type), intent(inout) :: x + + call x%set_state(psb_vect_null_) + end subroutine i2_vect_set_null + + subroutine i2_vect_set_bld(x) + implicit none + class(psb_i2_vect_type), intent(inout) :: x + + call x%set_state(psb_vect_bld_) + end subroutine i2_vect_set_bld + + subroutine i2_vect_set_upd(x) + implicit none + class(psb_i2_vect_type), intent(inout) :: x + + call x%set_state(psb_vect_upd_) + end subroutine i2_vect_set_upd + + subroutine i2_vect_set_asb(x) + implicit none + class(psb_i2_vect_type), intent(inout) :: x + + call x%set_state(psb_vect_asb_) + end subroutine i2_vect_set_asb + + function i2_vect_get_nrmv(x) result(res) + implicit none + class(psb_i2_vect_type), intent(in) :: x + integer(psb_ipk_) :: res + res = x%nrmv + end function i2_vect_get_nrmv + + subroutine i2_vect_set_nrmv(x,val) + implicit none + class(psb_i2_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(in) :: val + + x%nrmv = val + end subroutine i2_vect_set_nrmv + + function i2_vect_is_remote_build(x) result(res) + implicit none + class(psb_i2_vect_type), intent(in) :: x + logical :: res + res = (x%remote_build == psb_matbld_remote_) + end function i2_vect_is_remote_build + + subroutine i2_vect_set_remote_build(x,val) + implicit none + class(psb_i2_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(in), optional :: val + + if (present(val)) then + x%remote_build = val + else + x%remote_build = psb_matbld_remote_ + end if + end subroutine i2_vect_set_remote_build + + subroutine psb_i2_set_vect_default(v) + implicit none + class(psb_i2_base_vect_type), intent(in) :: v + + if (allocated(psb_i2_base_vect_default)) then + deallocate(psb_i2_base_vect_default) + end if + allocate(psb_i2_base_vect_default, mold=v) + + end subroutine psb_i2_set_vect_default + + function psb_i2_get_vect_default(v) result(res) + implicit none + class(psb_i2_vect_type), intent(in) :: v + class(psb_i2_base_vect_type), pointer :: res + + res => psb_i2_get_base_vect_default() + + end function psb_i2_get_vect_default + + subroutine psb_i2_clear_vect_default() + implicit none + + if (allocated(psb_i2_base_vect_default)) then + deallocate(psb_i2_base_vect_default) + end if + + end subroutine psb_i2_clear_vect_default + + function psb_i2_get_base_vect_default() result(res) + implicit none + class(psb_i2_base_vect_type), pointer :: res + + if (.not.allocated(psb_i2_base_vect_default)) then + allocate(psb_i2_base_vect_type :: psb_i2_base_vect_default) + end if + + res => psb_i2_base_vect_default + + end function psb_i2_get_base_vect_default + + subroutine i2_vect_clone(x,y,info) + implicit none + class(psb_i2_vect_type), intent(inout) :: x + class(psb_i2_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + call y%free(info) + if ((info==0).and.allocated(x%v)) then + ! + ! Using sourced allocation here creates + ! problems with handling of memory allocated + ! elsewhere (e.g. accelerators), hence delegation + ! to %bld method + ! + call y%bld(x%get_vect(),mold=x%v) + end if + end subroutine i2_vect_clone + + subroutine i2_vect_bld_x(x,invect,mold,scratch) + integer(psb_i2pk_), intent(in) :: invect(:) + class(psb_i2_vect_type), intent(inout) :: x + class(psb_i2_base_vect_type), intent(in), optional :: mold + logical, intent(in), optional :: scratch + + logical :: scratch_ + integer(psb_ipk_) :: info + + if (present(scratch)) then + scratch_ = scratch + else + scratch_ = .false. + end if + + info = psb_success_ + if (allocated(x%v)) & + & call x%free(info) + + if (present(mold)) then + allocate(x%v,stat=info,mold=mold) + else + allocate(x%v,stat=info, mold=psb_i2_get_base_vect_default()) + endif + + if (info == psb_success_) call x%v%bld(invect,scratch=scratch_) + + end subroutine i2_vect_bld_x + + + subroutine i2_vect_bld_mn(x,n,mold,scratch) + integer(psb_mpk_), intent(in) :: n + class(psb_i2_vect_type), intent(inout) :: x + class(psb_i2_base_vect_type), intent(in), optional :: mold + logical, intent(in), optional :: scratch + + logical :: scratch_ + integer(psb_ipk_) :: info + class(psb_i2_base_vect_type), pointer :: mld + if (present(scratch)) then + scratch_ = scratch + else + scratch_ = .false. + end if + + info = psb_success_ + if (allocated(x%v)) & + & call x%free(info) + + if (present(mold)) then + allocate(x%v,stat=info,mold=mold) + else + allocate(x%v,stat=info, mold=psb_i2_get_base_vect_default()) + endif + if (info == psb_success_) call x%v%bld(n,scratch=scratch_) + + end subroutine i2_vect_bld_mn + + subroutine i2_vect_bld_en(x,n,mold,scratch) + integer(psb_epk_), intent(in) :: n + class(psb_i2_vect_type), intent(inout) :: x + class(psb_i2_base_vect_type), intent(in), optional :: mold + logical, intent(in), optional :: scratch + + logical :: scratch_ + integer(psb_ipk_) :: info + + info = psb_success_ + if (present(scratch)) then + scratch_ = scratch + else + scratch_ = .false. + end if + + if (allocated(x%v)) & + & call x%free(info) + + if (present(mold)) then + allocate(x%v,stat=info,mold=mold) + else + allocate(x%v,stat=info, mold=psb_i2_get_base_vect_default()) + endif + if (info == psb_success_) call x%v%bld(n,scratch=scratch_) + + end subroutine i2_vect_bld_en + + function i2_vect_get_vect(x,n) result(res) + class(psb_i2_vect_type), intent(inout) :: x + integer(psb_i2pk_), allocatable :: res(:) + integer(psb_ipk_) :: info + integer(psb_ipk_), optional :: n + + if (allocated(x%v)) then + res = x%v%get_vect(n) + end if + end function i2_vect_get_vect + + subroutine i2_vect_set_scal(x,val,first,last) + class(psb_i2_vect_type), intent(inout) :: x + integer(psb_i2pk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: info + if (allocated(x%v)) call x%v%set(val,first,last) + + end subroutine i2_vect_set_scal + + subroutine i2_vect_set_vect(x,val,first,last) + class(psb_i2_vect_type), intent(inout) :: x + integer(psb_i2pk_), intent(in) :: val(:) + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: info + if (allocated(x%v)) call x%v%set(val,first,last) + + end subroutine i2_vect_set_vect + + subroutine i2_vect_check_addr(x) + class(psb_i2_vect_type), intent(inout) :: x + + integer(psb_ipk_) :: info + if (allocated(x%v)) call x%v%check_addr() + + end subroutine i2_vect_check_addr + + function constructor(x) result(this) + integer(psb_i2pk_) :: x(:) + type(psb_i2_vect_type) :: this + integer(psb_ipk_) :: info + + call this%bld(x) + call this%asb(size(x,kind=psb_ipk_),info) + + end function constructor + + + function size_const(n) result(this) + integer(psb_ipk_), intent(in) :: n + type(psb_i2_vect_type) :: this + integer(psb_ipk_) :: info + + call this%bld(n) + call this%asb(n,info) + + end function size_const + + function i2_vect_get_nrows(x) result(res) + implicit none + class(psb_i2_vect_type), intent(in) :: x + integer(psb_ipk_) :: res + res = 0 + if (allocated(x%v)) res = x%v%get_nrows() + end function i2_vect_get_nrows + + function i2_vect_sizeof(x) result(res) + implicit none + class(psb_i2_vect_type), intent(in) :: x + integer(psb_epk_) :: res + res = 0 + if (allocated(x%v)) res = x%v%sizeof() + end function i2_vect_sizeof + + function i2_vect_get_fmt(x) result(res) + implicit none + class(psb_i2_vect_type), intent(in) :: x + character(len=5) :: res + res = 'NULL' + if (allocated(x%v)) res = x%v%get_fmt() + end function i2_vect_get_fmt + + subroutine i2_vect_all(n, x, info, mold) + + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_i2_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + class(psb_i2_base_vect_type), intent(in), optional :: mold + + if (allocated(x%v)) & + & call x%free(info) + + if (present(mold)) then + allocate(x%v,stat=info,mold=mold) + else + allocate(psb_i2_base_vect_type :: x%v,stat=info) + endif + if (info == 0) then + call x%v%all(n,info) + else + info = psb_err_alloc_dealloc_ + end if + call x%set_bld() + end subroutine i2_vect_all + + subroutine i2_vect_reinit(x, info, clear) + implicit none + class(psb_i2_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: clear + + if (allocated(x%v)) call x%v%reinit(info,clear) + call x%set_upd() + + end subroutine i2_vect_reinit + + subroutine i2_vect_reall(n, x, info) + + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_i2_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (.not.allocated(x%v)) & + & call x%all(n,info) + if (info == 0) & + & call x%asb(n,info) + + end subroutine i2_vect_reall + + subroutine i2_vect_zero(x) + use psi_serial_mod + implicit none + class(psb_i2_vect_type), intent(inout) :: x + + if (allocated(x%v)) call x%v%zero() + + end subroutine i2_vect_zero + + subroutine i2_vect_asb(n, x, info, scratch) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_i2_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: scratch + + if (allocated(x%v)) then + call x%v%asb(n,info,scratch=scratch) + call x%set_asb() + end if + end subroutine i2_vect_asb + + subroutine i2_vect_gthab(n,idx,alpha,x,beta,y) + use psi_serial_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_i2pk_) :: alpha, beta, y(:) + class(psb_i2_vect_type) :: x + + if (allocated(x%v)) & + & call x%v%gth(n,idx,alpha,beta,y) + + end subroutine i2_vect_gthab + + subroutine i2_vect_gthzv(n,idx,x,y) + use psi_serial_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_i2pk_) :: y(:) + class(psb_i2_vect_type) :: x + + if (allocated(x%v)) & + & call x%v%gth(n,idx,y) + + end subroutine i2_vect_gthzv + + subroutine i2_vect_sctb(n,idx,x,beta,y) + use psi_serial_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_i2pk_) :: beta, x(:) + class(psb_i2_vect_type) :: y + + if (allocated(y%v)) & + & call y%v%sct(n,idx,x,beta) + + end subroutine i2_vect_sctb + + subroutine i2_vect_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_i2_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) then + call x%v%free(info) + if (info == 0) deallocate(x%v,stat=info) + end if + + end subroutine i2_vect_free + + subroutine i2_vect_ins_a(n,irl,val,x,maxr,info) + use psi_serial_mod + implicit none + class(psb_i2_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, maxr + integer(psb_ipk_), intent(in) :: irl(:) + integer(psb_i2pk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, dupl + + info = 0 + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + return + end if + dupl = x%get_dupl() + call x%v%ins(n,irl,val,dupl,maxr,info) + + end subroutine i2_vect_ins_a + + subroutine i2_vect_ins_v(n,irl,val,x,maxr,info) + use psi_serial_mod + implicit none + class(psb_i2_vect_type), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, maxr + class(psb_i_vect_type), intent(inout) :: irl + class(psb_i2_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, dupl + + info = 0 + if (.not.(allocated(x%v).and.allocated(irl%v).and.allocated(val%v))) then + info = psb_err_invalid_vect_state_ + return + end if + dupl = x%get_dupl() + call x%v%ins(n,irl%v,val%v,dupl,maxr,info) + + end subroutine i2_vect_ins_v + + + subroutine i2_vect_cnv(x,mold) + class(psb_i2_vect_type), intent(inout) :: x + class(psb_i2_base_vect_type), intent(in), optional :: mold + class(psb_i2_base_vect_type), allocatable :: tmp + + integer(psb_ipk_) :: info + + info = psb_success_ + if (present(mold)) then + allocate(tmp,stat=info,mold=mold) + else + allocate(tmp,stat=info,mold=psb_i2_get_base_vect_default()) + end if + if (allocated(x%v)) then + if (allocated(x%v%v)) then + call x%v%sync() + if (info == psb_success_) call tmp%bld(x%v%v) + call x%v%base_cpy(tmp) + call x%v%free(info) + endif + end if + call move_alloc(tmp,x%v) + + end subroutine i2_vect_cnv + + + subroutine i2_vect_sync(x) + implicit none + class(psb_i2_vect_type), intent(inout) :: x + + if (allocated(x%v)) & + & call x%v%sync() + + end subroutine i2_vect_sync + + subroutine i2_vect_set_sync(x) + implicit none + class(psb_i2_vect_type), intent(inout) :: x + + if (allocated(x%v)) & + & call x%v%set_sync() + + end subroutine i2_vect_set_sync + + subroutine i2_vect_set_host(x) + implicit none + class(psb_i2_vect_type), intent(inout) :: x + + if (allocated(x%v)) & + & call x%v%set_host() + + end subroutine i2_vect_set_host + + subroutine i2_vect_set_dev(x) + implicit none + class(psb_i2_vect_type), intent(inout) :: x + + if (allocated(x%v)) & + & call x%v%set_dev() + + end subroutine i2_vect_set_dev + + function i2_vect_is_sync(x) result(res) + implicit none + logical :: res + class(psb_i2_vect_type), intent(inout) :: x + + res = .true. + if (allocated(x%v)) & + & res = x%v%is_sync() + + end function i2_vect_is_sync + + function i2_vect_is_host(x) result(res) + implicit none + logical :: res + class(psb_i2_vect_type), intent(inout) :: x + + res = .true. + if (allocated(x%v)) & + & res = x%v%is_host() + + end function i2_vect_is_host + + function i2_vect_is_dev(x) result(res) + implicit none + logical :: res + class(psb_i2_vect_type), intent(inout) :: x + + res = .false. + if (allocated(x%v)) & + & res = x%v%is_dev() + + end function i2_vect_is_dev + + + + +end module psb_i2_vect_mod + + +module psb_i2_multivect_mod + + use psb_i2_base_multivect_mod + use psb_const_mod + use psb_i_vect_mod + + + !private + + type psb_i2_multivect_type + class(psb_i2_base_multivect_type), allocatable :: v + integer(psb_ipk_) :: nrmv = 0 + integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ + integer(psb_ipk_) :: dupl = psb_dupl_add_ + integer(psb_i2pk_), allocatable :: rmtv(:,:) + contains + procedure, pass(x) :: get_nrows => i2_mvect_get_nrows + procedure, pass(x) :: get_ncols => i2_mvect_get_ncols + procedure, pass(x) :: sizeof => i2_mvect_sizeof + procedure, pass(x) :: get_fmt => i2_mvect_get_fmt + procedure, pass(x) :: is_remote_build => i2_mvect_is_remote_build + procedure, pass(x) :: set_remote_build => i2_mvect_set_remote_build + procedure, pass(x) :: get_dupl => i2_mvect_get_dupl + procedure, pass(x) :: set_dupl => i2_mvect_set_dupl + + procedure, pass(x) :: all => i2_mvect_all + procedure, pass(x) :: reall => i2_mvect_reall + procedure, pass(x) :: zero => i2_mvect_zero + procedure, pass(x) :: asb => i2_mvect_asb + procedure, pass(x) :: sync => i2_mvect_sync + procedure, pass(x) :: free => i2_mvect_free + procedure, pass(x) :: ins => i2_mvect_ins + procedure, pass(x) :: bld_x => i2_mvect_bld_x + procedure, pass(x) :: bld_n => i2_mvect_bld_n + generic, public :: bld => bld_x, bld_n + procedure, pass(x) :: get_vect => i2_mvect_get_vect + procedure, pass(x) :: cnv => i2_mvect_cnv + procedure, pass(x) :: set_scal => i2_mvect_set_scal + procedure, pass(x) :: set_vect => i2_mvect_set_vect + generic, public :: set => set_vect, set_scal + procedure, pass(x) :: clone => i2_mvect_clone + procedure, pass(x) :: gthab => i2_mvect_gthab + procedure, pass(x) :: gthzv => i2_mvect_gthzv + procedure, pass(x) :: gthzv_x => i2_mvect_gthzv_x + generic, public :: gth => gthab, gthzv + procedure, pass(y) :: sctb => i2_mvect_sctb + procedure, pass(y) :: sctb_x => i2_mvect_sctb_x + generic, public :: sct => sctb, sctb_x + end type psb_i2_multivect_type + + public :: psb_i2_multivect, psb_i2_multivect_type,& + & psb_set_multivect_default, psb_get_multivect_default, & + & psb_i2_base_multivect_type + + private + interface psb_i2_multivect + module procedure constructor, size_const + end interface psb_i2_multivect + + class(psb_i2_base_multivect_type), allocatable, target,& + & save, private :: psb_i2_base_multivect_default + + interface psb_set_multivect_default + module procedure psb_i2_set_multivect_default + end interface psb_set_multivect_default + + interface psb_get_multivect_default + module procedure psb_i2_get_multivect_default + end interface psb_get_multivect_default + + +contains + + + function i2_mvect_get_dupl(x) result(res) + implicit none + class(psb_i2_multivect_type), intent(in) :: x + integer(psb_ipk_) :: res + res = x%dupl + end function i2_mvect_get_dupl + + subroutine i2_mvect_set_dupl(x,val) + implicit none + class(psb_i2_multivect_type), intent(inout) :: x + integer(psb_ipk_), intent(in), optional :: val + + if (present(val)) then + x%dupl = val + else + x%dupl = psb_dupl_def_ + end if + end subroutine i2_mvect_set_dupl + + + function i2_mvect_is_remote_build(x) result(res) + implicit none + class(psb_i2_multivect_type), intent(in) :: x + logical :: res + res = (x%remote_build == psb_matbld_remote_) + end function i2_mvect_is_remote_build + + subroutine i2_mvect_set_remote_build(x,val) + implicit none + class(psb_i2_multivect_type), intent(inout) :: x + integer(psb_ipk_), intent(in), optional :: val + + if (present(val)) then + x%remote_build = val + else + x%remote_build = psb_matbld_remote_ + end if + end subroutine i2_mvect_set_remote_build + + + subroutine psb_i2_set_multivect_default(v) + implicit none + class(psb_i2_base_multivect_type), intent(in) :: v + + if (allocated(psb_i2_base_multivect_default)) then + deallocate(psb_i2_base_multivect_default) + end if + allocate(psb_i2_base_multivect_default, mold=v) + + end subroutine psb_i2_set_multivect_default + + function psb_i2_get_multivect_default(v) result(res) + implicit none + class(psb_i2_multivect_type), intent(in) :: v + class(psb_i2_base_multivect_type), pointer :: res + + res => psb_i2_get_base_multivect_default() + + end function psb_i2_get_multivect_default + + + function psb_i2_get_base_multivect_default() result(res) + implicit none + class(psb_i2_base_multivect_type), pointer :: res + + if (.not.allocated(psb_i2_base_multivect_default)) then + allocate(psb_i2_base_multivect_type :: psb_i2_base_multivect_default) + end if + + res => psb_i2_base_multivect_default + + end function psb_i2_get_base_multivect_default + + + subroutine i2_mvect_clone(x,y,info) + implicit none + class(psb_i2_multivect_type), intent(inout) :: x + class(psb_i2_multivect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + call y%free(info) + if ((info==0).and.allocated(x%v)) then + call y%bld_x(x%get_vect(),mold=x%v) + end if + end subroutine i2_mvect_clone + + subroutine i2_mvect_bld_x(x,invect,mold) + integer(psb_i2pk_), intent(in) :: invect(:,:) + class(psb_i2_multivect_type), intent(out) :: x + class(psb_i2_base_multivect_type), intent(in), optional :: mold + integer(psb_ipk_) :: info + class(psb_i2_base_multivect_type), pointer :: mld + + info = psb_success_ + if (present(mold)) then + allocate(x%v,stat=info,mold=mold) + else + allocate(x%v,stat=info, mold=psb_i2_get_base_multivect_default()) + endif + + if (info == psb_success_) call x%v%bld(invect) + + end subroutine i2_mvect_bld_x + + + subroutine i2_mvect_bld_n(x,m,n,mold) + integer(psb_ipk_), intent(in) :: m,n + class(psb_i2_multivect_type), intent(out) :: x + class(psb_i2_base_multivect_type), intent(in), optional :: mold + integer(psb_ipk_) :: info + + info = psb_success_ + if (present(mold)) then + allocate(x%v,stat=info,mold=mold) + else + allocate(x%v,stat=info, mold=psb_i2_get_base_multivect_default()) + endif + if (info == psb_success_) call x%v%bld(m,n) + + end subroutine i2_mvect_bld_n + + function i2_mvect_get_vect(x) result(res) + class(psb_i2_multivect_type), intent(inout) :: x + integer(psb_i2pk_), allocatable :: res(:,:) + integer(psb_ipk_) :: info + + if (allocated(x%v)) then + res = x%v%get_vect() + end if + end function i2_mvect_get_vect + + subroutine i2_mvect_set_scal(x,val) + class(psb_i2_multivect_type), intent(inout) :: x + integer(psb_i2pk_), intent(in) :: val + + integer(psb_ipk_) :: info + if (allocated(x%v)) call x%v%set(val) + + end subroutine i2_mvect_set_scal + + subroutine i2_mvect_set_vect(x,val) + class(psb_i2_multivect_type), intent(inout) :: x + integer(psb_i2pk_), intent(in) :: val(:,:) + + integer(psb_ipk_) :: info + if (allocated(x%v)) call x%v%set(val) + + end subroutine i2_mvect_set_vect + + + function constructor(x) result(this) + integer(psb_i2pk_) :: x(:,:) + type(psb_i2_multivect_type) :: this + integer(psb_ipk_) :: info + + call this%bld_x(x) + call this%asb(size(x,dim=1,kind=psb_ipk_),size(x,dim=2,kind=psb_ipk_),info) + + end function constructor + + + function size_const(m,n) result(this) + integer(psb_ipk_), intent(in) :: m,n + type(psb_i2_multivect_type) :: this + integer(psb_ipk_) :: info + + call this%bld_n(m,n) + call this%asb(m,n,info) + + end function size_const + + function i2_mvect_get_nrows(x) result(res) + implicit none + class(psb_i2_multivect_type), intent(in) :: x + integer(psb_ipk_) :: res + res = 0 + if (allocated(x%v)) res = x%v%get_nrows() + end function i2_mvect_get_nrows + + function i2_mvect_get_ncols(x) result(res) + implicit none + class(psb_i2_multivect_type), intent(in) :: x + integer(psb_ipk_) :: res + res = 0 + if (allocated(x%v)) res = x%v%get_ncols() + end function i2_mvect_get_ncols + + function i2_mvect_sizeof(x) result(res) + implicit none + class(psb_i2_multivect_type), intent(in) :: x + integer(psb_epk_) :: res + res = 0 + if (allocated(x%v)) res = x%v%sizeof() + end function i2_mvect_sizeof + + function i2_mvect_get_fmt(x) result(res) + implicit none + class(psb_i2_multivect_type), intent(in) :: x + character(len=5) :: res + res = 'NULL' + if (allocated(x%v)) res = x%v%get_fmt() + end function i2_mvect_get_fmt + + subroutine i2_mvect_all(m,n, x, info, mold) + + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_i2_multivect_type), intent(out) :: x + class(psb_i2_base_multivect_type), intent(in), optional :: mold + integer(psb_ipk_), intent(out) :: info + + if (present(mold)) then + allocate(x%v,stat=info,mold=mold) + else + allocate(psb_i2_base_multivect_type :: x%v,stat=info) + endif + if (info == 0) then + call x%v%all(m,n,info) + else + info = psb_err_alloc_dealloc_ + end if + + end subroutine i2_mvect_all + + subroutine i2_mvect_reall(m,n, x, info) + + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_i2_multivect_type), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (.not.allocated(x%v)) & + & call x%all(m,n,info) + if (info == 0) & + & call x%asb(m,n,info) + + end subroutine i2_mvect_reall + + subroutine i2_mvect_zero(x) + use psi_serial_mod + implicit none + class(psb_i2_multivect_type), intent(inout) :: x + + if (allocated(x%v)) call x%v%zero() + + end subroutine i2_mvect_zero + + subroutine i2_mvect_asb(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_i2_multivect_type), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + if (allocated(x%v)) & + & call x%v%asb(m,n,info) + + end subroutine i2_mvect_asb + + subroutine i2_mvect_sync(x) + implicit none + class(psb_i2_multivect_type), intent(inout) :: x + + if (allocated(x%v)) & + & call x%v%sync() + + end subroutine i2_mvect_sync + + subroutine i2_mvect_gthab(n,idx,alpha,x,beta,y) + use psi_serial_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_i2pk_) :: alpha, beta, y(:) + class(psb_i2_multivect_type) :: x + + if (allocated(x%v)) & + & call x%v%gth(n,idx,alpha,beta,y) + + end subroutine i2_mvect_gthab + + subroutine i2_mvect_gthzv(n,idx,x,y) + use psi_serial_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_i2pk_) :: y(:) + class(psb_i2_multivect_type) :: x + + if (allocated(x%v)) & + & call x%v%gth(n,idx,y) + + end subroutine i2_mvect_gthzv + + subroutine i2_mvect_gthzv_x(i,n,idx,x,y) + use psi_serial_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i + class(psb_i_base_vect_type) :: idx + integer(psb_i2pk_) :: y(:) + class(psb_i2_multivect_type) :: x + + if (allocated(x%v)) & + & call x%v%gth(i,n,idx,y) + + end subroutine i2_mvect_gthzv_x + + subroutine i2_mvect_sctb(n,idx,x,beta,y) + use psi_serial_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_i2pk_) :: beta, x(:) + class(psb_i2_multivect_type) :: y + + if (allocated(y%v)) & + & call y%v%sct(n,idx,x,beta) + + end subroutine i2_mvect_sctb + + subroutine i2_mvect_sctb_x(i,n,idx,x,beta,y) + use psi_serial_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i + class(psb_i_base_vect_type) :: idx + integer(psb_i2pk_) :: beta, x(:) + class(psb_i2_multivect_type) :: y + + if (allocated(y%v)) & + & call y%v%sct(i,n,idx,x,beta) + + end subroutine i2_mvect_sctb_x + + subroutine i2_mvect_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_i2_multivect_type), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) then + call x%v%free(info) + if (info == 0) deallocate(x%v,stat=info) + end if + + end subroutine i2_mvect_free + + subroutine i2_mvect_ins(n,irl,val,x,maxr,info) + use psi_serial_mod + implicit none + class(psb_i2_multivect_type), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n,maxr + integer(psb_ipk_), intent(in) :: irl(:) + integer(psb_i2pk_), intent(in) :: val(:,:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, dupl + + info = 0 + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + return + end if + dupl = x%get_dupl() + call x%v%ins(n,irl,val,dupl,maxr,info) + + end subroutine i2_mvect_ins + + + subroutine i2_mvect_cnv(x,mold) + class(psb_i2_multivect_type), intent(inout) :: x + class(psb_i2_base_multivect_type), intent(in), optional :: mold + class(psb_i2_base_multivect_type), allocatable :: tmp + integer(psb_ipk_) :: info + + if (present(mold)) then + allocate(tmp,stat=info,mold=mold) + else + allocate(tmp,stat=info, mold=psb_i2_get_base_multivect_default()) + endif + if (allocated(x%v)) then + call x%v%sync() + if (info == psb_success_) call tmp%bld(x%v%v) + call x%v%free(info) + end if + call move_alloc(tmp,x%v) + end subroutine i2_mvect_cnv + + +end module psb_i2_multivect_mod diff --git a/base/modules/serial/psb_vect_mod.f90 b/base/modules/serial/psb_vect_mod.f90 index 80e7da77..d82eeffb 100644 --- a/base/modules/serial/psb_vect_mod.f90 +++ b/base/modules/serial/psb_vect_mod.f90 @@ -1,4 +1,5 @@ module psb_vect_mod + use psb_i2_vect_mod use psb_i_vect_mod use psb_l_vect_mod use psb_s_vect_mod diff --git a/base/modules/tools/psb_tools_mod.f90 b/base/modules/tools/psb_tools_mod.f90 index 1fdd3057..ca2020ee 100644 --- a/base/modules/tools/psb_tools_mod.f90 +++ b/base/modules/tools/psb_tools_mod.f90 @@ -31,6 +31,7 @@ ! module psb_tools_mod use psb_cd_tools_mod + use psb_i2_tools_a_mod use psb_e_tools_a_mod use psb_m_tools_a_mod use psb_s_tools_a_mod diff --git a/base/serial/Makefile b/base/serial/Makefile index 57e4bfa1..7e0431aa 100644 --- a/base/serial/Makefile +++ b/base/serial/Makefile @@ -1,7 +1,7 @@ include ../../Make.inc -FOBJS = psb_lsame.o psi_m_serial_impl.o psi_e_serial_impl.o \ +FOBJS = psb_lsame.o psi_m_serial_impl.o psi_e_serial_impl.o psi_i2_serial_impl.o \ psi_s_serial_impl.o psi_d_serial_impl.o \ psi_c_serial_impl.o psi_z_serial_impl.o \ psb_srwextd.o psb_drwextd.o psb_crwextd.o psb_zrwextd.o \ diff --git a/base/serial/sort/Makefile b/base/serial/sort/Makefile index ff8fd620..9a462839 100644 --- a/base/serial/sort/Makefile +++ b/base/serial/sort/Makefile @@ -3,6 +3,7 @@ include ../../../Make.inc # # The object files # +I2OBJS=psb_i2_hsort_impl.o psb_i2_isort_impl.o psb_i2_msort_impl.o psb_i2_qsort_impl.o IOBJS=psb_m_hsort_impl.o psb_m_isort_impl.o psb_m_msort_impl.o psb_m_qsort_impl.o LOBJS=psb_e_hsort_impl.o psb_e_isort_impl.o psb_e_msort_impl.o psb_e_qsort_impl.o SOBJS=psb_s_hsort_impl.o psb_s_isort_impl.o psb_s_msort_impl.o psb_s_qsort_impl.o @@ -10,7 +11,7 @@ DOBJS=psb_d_hsort_impl.o psb_d_isort_impl.o psb_d_msort_impl.o psb_d_qsort_impl. COBJS=psb_c_hsort_impl.o psb_c_isort_impl.o psb_c_msort_impl.o psb_c_qsort_impl.o ZOBJS=psb_z_hsort_impl.o psb_z_isort_impl.o psb_z_msort_impl.o psb_z_qsort_impl.o -OBJS=$(SOBJS) $(DOBJS) $(COBJS) $(ZOBJS) $(IOBJS) $(LOBJS) +OBJS=$(SOBJS) $(DOBJS) $(COBJS) $(ZOBJS) $(I2OBJS) $(IOBJS) $(LOBJS) # # Where the library should go, and how it is called. diff --git a/base/serial/sort/psb_i2_hsort_impl.f90 b/base/serial/sort/psb_i2_hsort_impl.f90 new file mode 100644 index 00000000..06fff6ba --- /dev/null +++ b/base/serial/sort/psb_i2_hsort_impl.f90 @@ -0,0 +1,721 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! The merge-sort and quicksort routines are implemented in the +! serial/aux directory +! References: +! D. Knuth +! The Art of Computer Programming, vol. 3 +! Addison-Wesley +! +! Aho, Hopcroft, Ullman +! Data Structures and Algorithms +! Addison-Wesley +! +subroutine psb_i2hsort(x,ix,dir,flag,reord) + use psb_sort_mod, psb_protect_name => psb_i2hsort + use psb_error_mod + implicit none + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_ipk_), optional, intent(in) :: dir, flag,reord + integer(psb_ipk_), optional, intent(inout) :: ix(:) + + integer(psb_ipk_) :: flag_, err_act, info, reord_ + integer(psb_ipk_) :: n, i, l, dir_ + integer(psb_i2pk_) :: key + integer(psb_ipk_) :: index + integer(psb_i2pk_), allocatable :: tx(:) + + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name + + name='psb_hsort' + call psb_erractionsave(err_act) + + + if (present(reord)) then + reord_ = reord + else + reord_= psb_sort_reord_x_ + end if + + if (present(flag)) then + flag_ = flag + else + flag_ = psb_sort_ovw_idx_ + end if + select case(flag_) + case( psb_sort_ovw_idx_, psb_sort_keep_idx_) + ! OK keep going + case default + ierr(1) = 4; ierr(2) = flag_; + call psb_errpush(psb_err_input_value_invalid_i_,name,i_err=ierr) + goto 9999 + end select + + if (present(dir)) then + dir_ = dir + else + dir_= psb_sort_up_ + end if + + select case(dir_) + case(psb_sort_up_,psb_sort_down_) + ! OK + case (psb_asort_up_,psb_asort_down_) + ! OK + case default + ierr(1) = 3; ierr(2) = dir_; + call psb_errpush(psb_err_input_value_invalid_i_,name,i_err=ierr) + goto 9999 + end select + + n = size(x) + + ! + ! Dirty trick to sort with heaps: if we want + ! to sort in place upwards, first we set up a heap so that + ! we can easily get the LARGEST element, then we take it out + ! and put it in the last entry, and so on. + ! So, we invert dir_ + ! + dir_ = -dir_ + + if (present(ix)) then + if (size(ix) < n) then + ierr(1) = 2; ierr(2) = size(ix); + call psb_errpush(psb_err_input_asize_invalid_i_,name,i_err=ierr) + goto 9999 + end if + if (flag_ == psb_sort_ovw_idx_) then + do i=1, n + ix(i) = i + end do + end if + select case(reord_) + case (psb_sort_reord_x_) + + l = 0 + do i=1, n + key = x(i) + index = ix(i) + call psi_idx_insert_heap(key,index,l,x,ix,dir_,info) + if (l /= i) then + write(psb_err_unit,*) 'Mismatch while heapifying ! ' + end if + end do + do i=n, 2, -1 + call psi_idx_heap_get_first(key,index,l,x,ix,dir_,info) + if (l /= i-1) then + write(psb_err_unit,*) 'Mismatch while pulling out of heap ',l,i + end if + x(i) = key + ix(i) = index + end do + case(psb_sort_noreord_x_) + tx = x + + l = 0 + do i=1, n + key = tx(i) + index = ix(i) + call psi_idx_insert_heap(key,index,l,tx,ix,dir_,info) + if (l /= i) then + write(psb_err_unit,*) 'Mismatch while heapifying ! ' + end if + end do + do i=n, 2, -1 + call psi_idx_heap_get_first(key,index,l,tx,ix,dir_,info) + if (l /= i-1) then + write(psb_err_unit,*) 'Mismatch while pulling out of heap ',l,i + end if + tx(i) = key + ix(i) = index + end do + end select + else if (.not.present(ix)) then + select case(reord_) + case (psb_sort_reord_x_) + !OK + case default + ierr(1) = 5; ierr(2) = reord_; + call psb_errpush(psb_err_input_value_invalid_i_,name,i_err=ierr) + goto 9999 + end select + + l = 0 + do i=1, n + key = x(i) + call psi_insert_heap(key,l,x,dir_,info) + if (l /= i) then + write(psb_err_unit,*) 'Mismatch while heapifying ! ',l,i + end if + end do + do i=n, 2, -1 + call psi_i2_heap_get_first(key,l,x,dir_,info) + if (l /= i-1) then + write(psb_err_unit,*) 'Mismatch while pulling out of heap ',l,i + end if + x(i) = key + end do + end if + + + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_i2hsort + + + +! +! These are packaged so that they can be used to implement +! a heapsort. +! +! +! Programming note: +! In the implementation of the heap_get_first function +! we have code like this +! +! if ( ( heap(2*i) < heap(2*i+1) ) .or.& +! & (2*i == last)) then +! j = 2*i +! else +! j = 2*i + 1 +! end if +! +! It looks like the 2*i+1 could overflow the array, but this +! is not true because there is a guard statement +! if (i>last/2) exit +! and because last has just been reduced by 1 when defining the return value, +! therefore 2*i+1 may be greater than the current value of last, +! but cannot be greater than the value of last when the routine was entered +! hence it is safe. +! +! +! + +subroutine psi_i2_insert_heap(key,last,heap,dir,info) + use psb_sort_mod, psb_protect_name => psi_i2_insert_heap + implicit none + + ! + ! Input: + ! key: the new value + ! last: pointer to the last occupied element in heap + ! heap: the heap + ! dir: sorting direction + + integer(psb_i2pk_), intent(in) :: key + integer(psb_ipk_), intent(in) :: dir + integer(psb_i2pk_), intent(inout) :: heap(:) + integer(psb_ipk_), intent(inout) :: last + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, i2 + integer(psb_i2pk_) :: temp + + info = psb_success_ + if (last < 0) then + write(psb_err_unit,*) 'Invalid last in heap ',last + info = last + return + endif + last = last + 1 + if (last > size(heap)) then + write(psb_err_unit,*) 'out of bounds ' + info = -1 + return + end if + i = last + heap(i) = key + + select case(dir) + case (psb_sort_up_) + + do + if (i<=1) exit + i2 = i/2 + if (heap(i) < heap(i2)) then + temp = heap(i) + heap(i) = heap(i2) + heap(i2) = temp + i = i2 + else + exit + end if + end do + + + case (psb_sort_down_) + + do + if (i<=1) exit + i2 = i/2 + if (heap(i) > heap(i2)) then + temp = heap(i) + heap(i) = heap(i2) + heap(i2) = temp + i = i2 + else + exit + end if + end do + + case (psb_asort_up_) + + do + if (i<=1) exit + i2 = i/2 + if (abs(heap(i)) < abs(heap(i2))) then + temp = heap(i) + heap(i) = heap(i2) + heap(i2) = temp + i = i2 + else + exit + end if + end do + + + case (psb_asort_down_) + + do + if (i<=1) exit + i2 = i/2 + if (abs(heap(i)) > abs(heap(i2))) then + temp = heap(i) + heap(i) = heap(i2) + heap(i2) = temp + i = i2 + else + exit + end if + end do + + + case default + write(psb_err_unit,*) 'Invalid direction in heap ',dir + end select + + return +end subroutine psi_i2_insert_heap + + +subroutine psi_i2_heap_get_first(key,last,heap,dir,info) + use psb_sort_mod, psb_protect_name => psi_i2_heap_get_first + implicit none + + integer(psb_i2pk_), intent(inout) :: key + integer(psb_ipk_), intent(inout) :: last + integer(psb_ipk_), intent(in) :: dir + integer(psb_i2pk_), intent(inout) :: heap(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, j + integer(psb_i2pk_) :: temp + + + info = psb_success_ + if (last <= 0) then + key = 0 + info = -1 + return + endif + + key = heap(1) + heap(1) = heap(last) + last = last - 1 + + select case(dir) + case (psb_sort_up_) + + i = 1 + do + if (i > (last/2)) exit + if ( (heap(2*i) < heap(2*i+1)) .or.& + & (2*i == last)) then + j = 2*i + else + j = 2*i + 1 + end if + + if (heap(i) > heap(j)) then + temp = heap(i) + heap(i) = heap(j) + heap(j) = temp + i = j + else + exit + end if + end do + + + case (psb_sort_down_) + + i = 1 + do + if (i > (last/2)) exit + if ( (heap(2*i) > heap(2*i+1)) .or.& + & (2*i == last)) then + j = 2*i + else + j = 2*i + 1 + end if + + if (heap(i) < heap(j)) then + temp = heap(i) + heap(i) = heap(j) + heap(j) = temp + i = j + else + exit + end if + end do + + case (psb_asort_up_) + + i = 1 + do + if (i > (last/2)) exit + if ( (abs(heap(2*i)) < abs(heap(2*i+1))) .or.& + & (2*i == last)) then + j = 2*i + else + j = 2*i + 1 + end if + + if (abs(heap(i)) > abs(heap(j))) then + temp = heap(i) + heap(i) = heap(j) + heap(j) = temp + i = j + else + exit + end if + end do + + + case (psb_asort_down_) + + i = 1 + do + if (i > (last/2)) exit + if ( (abs(heap(2*i)) > abs(heap(2*i+1))) .or.& + & (2*i == last)) then + j = 2*i + else + j = 2*i + 1 + end if + + if (abs(heap(i)) < abs(heap(j))) then + temp = heap(i) + heap(i) = heap(j) + heap(j) = temp + i = j + else + exit + end if + end do + + case default + write(psb_err_unit,*) 'Invalid direction in heap ',dir + end select + + return +end subroutine psi_i2_heap_get_first + + +subroutine psi_i2_idx_insert_heap(key,index,last,heap,idxs,dir,info) + use psb_sort_mod, psb_protect_name => psi_i2_idx_insert_heap + + implicit none + ! + ! Input: + ! key: the new value + ! index: the new index + ! last: pointer to the last occupied element in heap + ! heap: the heap + ! idxs: the indices + ! dir: sorting direction + + integer(psb_i2pk_), intent(in) :: key + integer(psb_ipk_), intent(in) :: index,dir + integer(psb_i2pk_), intent(inout) :: heap(:) + integer(psb_ipk_), intent(inout) :: idxs(:),last + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, i2, itemp + integer(psb_i2pk_) :: temp + + info = psb_success_ + if (last < 0) then + write(psb_err_unit,*) 'Invalid last in heap ',last + info = last + return + endif + + last = last + 1 + if (last > size(heap)) then + write(psb_err_unit,*) 'out of bounds ' + info = -1 + return + end if + + i = last + heap(i) = key + idxs(i) = index + + select case(dir) + case (psb_sort_up_) + + do + if (i<=1) exit + i2 = i/2 + if (heap(i) < heap(i2)) then + itemp = idxs(i) + idxs(i) = idxs(i2) + idxs(i2) = itemp + temp = heap(i) + heap(i) = heap(i2) + heap(i2) = temp + i = i2 + else + exit + end if + end do + + + case (psb_sort_down_) + + do + if (i<=1) exit + i2 = i/2 + if (heap(i) > heap(i2)) then + itemp = idxs(i) + idxs(i) = idxs(i2) + idxs(i2) = itemp + temp = heap(i) + heap(i) = heap(i2) + heap(i2) = temp + i = i2 + else + exit + end if + end do + + case (psb_asort_up_) + + do + if (i<=1) exit + i2 = i/2 + if (abs(heap(i)) < abs(heap(i2))) then + itemp = idxs(i) + idxs(i) = idxs(i2) + idxs(i2) = itemp + temp = heap(i) + heap(i) = heap(i2) + heap(i2) = temp + i = i2 + else + exit + end if + end do + + + case (psb_asort_down_) + + do + if (i<=1) exit + i2 = i/2 + if (abs(heap(i)) > abs(heap(i2))) then + itemp = idxs(i) + idxs(i) = idxs(i2) + idxs(i2) = itemp + temp = heap(i) + heap(i) = heap(i2) + heap(i2) = temp + i = i2 + else + exit + end if + end do + + + case default + write(psb_err_unit,*) 'Invalid direction in heap ',dir + end select + + return +end subroutine psi_i2_idx_insert_heap + +subroutine psi_i2_idx_heap_get_first(key,index,last,heap,idxs,dir,info) + use psb_sort_mod, psb_protect_name => psi_i2_idx_heap_get_first + implicit none + + integer(psb_i2pk_), intent(inout) :: key + integer(psb_i2pk_), intent(inout) :: heap(:) + integer(psb_ipk_), intent(out) :: index + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(inout) :: last,idxs(:) + integer(psb_ipk_), intent(in) :: dir + + integer(psb_ipk_) :: i, j,itemp + integer(psb_i2pk_) :: temp + + info = psb_success_ + if (last <= 0) then + key = 0 + index = 0 + info = -1 + return + endif + + key = heap(1) + index = idxs(1) + heap(1) = heap(last) + idxs(1) = idxs(last) + last = last - 1 + + select case(dir) + case (psb_sort_up_) + + i = 1 + do + if (i > (last/2)) exit + if ( (heap(2*i) < heap(2*i+1)) .or.& + & (2*i == last)) then + j = 2*i + else + j = 2*i + 1 + end if + + if (heap(i) > heap(j)) then + itemp = idxs(i) + idxs(i) = idxs(j) + idxs(j) = itemp + temp = heap(i) + heap(i) = heap(j) + heap(j) = temp + i = j + else + exit + end if + end do + + + case (psb_sort_down_) + + i = 1 + do + if (i > (last/2)) exit + if ( (heap(2*i) > heap(2*i+1)) .or.& + & (2*i == last)) then + j = 2*i + else + j = 2*i + 1 + end if + + if (heap(i) < heap(j)) then + itemp = idxs(i) + idxs(i) = idxs(j) + idxs(j) = itemp + temp = heap(i) + heap(i) = heap(j) + heap(j) = temp + i = j + else + exit + end if + end do + + case (psb_asort_up_) + + i = 1 + do + if (i > (last/2)) exit + if ( (abs(heap(2*i)) < abs(heap(2*i+1))) .or.& + & (2*i == last)) then + j = 2*i + else + j = 2*i + 1 + end if + + if (abs(heap(i)) > abs(heap(j))) then + itemp = idxs(i) + idxs(i) = idxs(j) + idxs(j) = itemp + temp = heap(i) + heap(i) = heap(j) + heap(j) = temp + i = j + else + exit + end if + end do + + + case (psb_asort_down_) + + i = 1 + do + if (i > (last/2)) exit + if ( (abs(heap(2*i)) > abs(heap(2*i+1))) .or.& + & (2*i == last)) then + j = 2*i + else + j = 2*i + 1 + end if + + if (abs(heap(i)) < abs(heap(j))) then + itemp = idxs(i) + idxs(i) = idxs(j) + idxs(j) = itemp + temp = heap(i) + heap(i) = heap(j) + heap(j) = temp + i = j + else + exit + end if + end do + + case default + write(psb_err_unit,*) 'Invalid direction in heap ',dir + end select + + return +end subroutine psi_i2_idx_heap_get_first + + + + diff --git a/base/serial/sort/psb_i2_isort_impl.f90 b/base/serial/sort/psb_i2_isort_impl.f90 new file mode 100644 index 00000000..f582d268 --- /dev/null +++ b/base/serial/sort/psb_i2_isort_impl.f90 @@ -0,0 +1,378 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! The insertion sort routines +! References: +! D. Knuth +! The Art of Computer Programming, vol. 3 +! Addison-Wesley +! +! Aho, Hopcroft, Ullman +! Data Structures and Algorithms +! Addison-Wesley +! +subroutine psb_i2isort(x,ix,dir,flag,reord) + use psb_sort_mod, psb_protect_name => psb_i2isort + use psb_error_mod + implicit none + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_ipk_), optional, intent(in) :: dir, flag,reord + integer(psb_ipk_), optional, intent(inout) :: ix(:) + + integer(psb_ipk_) :: dir_, flag_, err_act, reord_ + integer(psb_ipk_) :: n, i + integer(psb_i2pk_), allocatable :: tx(:) + + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name + + name='psb_i2isort' + call psb_erractionsave(err_act) + + if (present(reord)) then + reord_ = reord + else + reord_= psb_sort_reord_x_ + end if + + if (present(flag)) then + flag_ = flag + else + flag_ = psb_sort_ovw_idx_ + end if + select case(flag_) + case( psb_sort_ovw_idx_, psb_sort_keep_idx_) + ! OK keep going + case default + ierr(1) = 4; ierr(2) = flag_; + call psb_errpush(psb_err_input_value_invalid_i_,name,i_err=ierr) + goto 9999 + end select + + if (present(dir)) then + dir_ = dir + else + dir_= psb_sort_up_ + end if + + n = size(x) + + if (present(ix)) then + if (size(ix) < n) then + ierr(1) = 2; ierr(2) = size(ix); + call psb_errpush(psb_err_input_asize_invalid_i_,name,i_err=ierr) + goto 9999 + end if + if (flag_==psb_sort_ovw_idx_) then + do i=1,n + ix(i) = i + end do + end if + select case(reord_) + case (psb_sort_reord_x_) + select case(dir_) + case (psb_sort_up_) + call psi_i2isrx_up(n,x,ix) + case (psb_sort_down_) + call psi_i2isrx_dw(n,x,ix) + case (psb_asort_up_) + call psi_i2aisrx_up(n,x,ix) + case (psb_asort_down_) + call psi_i2aisrx_dw(n,x,ix) + case default + ierr(1) = 3; ierr(2) = dir_; + call psb_errpush(psb_err_input_value_invalid_i_,name,i_err=ierr) + goto 9999 + end select + case(psb_sort_noreord_x_) + tx = x + select case(dir_) + case (psb_sort_up_) + call psi_i2isrx_up(n,tx,ix) + case (psb_sort_down_) + call psi_i2isrx_dw(n,tx,ix) + case (psb_asort_up_) + call psi_i2aisrx_up(n,tx,ix) + case (psb_asort_down_) + call psi_i2aisrx_dw(n,tx,ix) + case default + ierr(1) = 3; ierr(2) = dir_; + call psb_errpush(psb_err_input_value_invalid_i_,name,i_err=ierr) + goto 9999 + end select + case default + ierr(1) = 5; ierr(2) = reord_; + call psb_errpush(psb_err_input_value_invalid_i_,name,i_err=ierr) + goto 9999 + end select + else + select case(reord_) + case (psb_sort_reord_x_) + !OK + case default + ierr(1) = 5; ierr(2) = reord_; + call psb_errpush(psb_err_input_value_invalid_i_,name,i_err=ierr) + goto 9999 + end select + select case(dir_) + case (psb_sort_up_) + call psi_i2isr_up(n,x) + case (psb_sort_down_) + call psi_i2isr_dw(n,x) + case (psb_asort_up_) + call psi_i2aisr_up(n,x) + case (psb_asort_down_) + call psi_i2aisr_dw(n,x) + case default + ierr(1) = 3; ierr(2) = dir_; + call psb_errpush(psb_err_input_value_invalid_i_,name,i_err=ierr) + goto 9999 + end select + + end if + + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_i2isort + +subroutine psi_i2isrx_up(n,x,idx) + use psb_sort_mod, psb_protect_name => psi_i2isrx_up + use psb_error_mod + implicit none + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_ipk_), intent(inout) :: idx(:) + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_) :: i,j,ix + integer(psb_i2pk_) :: xx + + do j=n-1,1,-1 + if (x(j+1) < x(j)) then + xx = x(j) + ix = idx(j) + i=j+1 + do + x(i-1) = x(i) + idx(i-1) = idx(i) + i = i+1 + if (i>n) exit + if (x(i) >= xx) exit + end do + x(i-1) = xx + idx(i-1) = ix + endif + enddo +end subroutine psi_i2isrx_up + +subroutine psi_i2isrx_dw(n,x,idx) + use psb_sort_mod, psb_protect_name => psi_i2isrx_dw + use psb_error_mod + implicit none + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_ipk_), intent(inout) :: idx(:) + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_) :: i,j,ix + integer(psb_i2pk_) :: xx + + do j=n-1,1,-1 + if (x(j+1) > x(j)) then + xx = x(j) + ix = idx(j) + i=j+1 + do + x(i-1) = x(i) + idx(i-1) = idx(i) + i = i+1 + if (i>n) exit + if (x(i) <= xx) exit + end do + x(i-1) = xx + idx(i-1) = ix + endif + enddo +end subroutine psi_i2isrx_dw + + +subroutine psi_i2isr_up(n,x) + use psb_sort_mod, psb_protect_name => psi_i2isr_up + use psb_error_mod + implicit none + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_) :: i,j + integer(psb_i2pk_) :: xx + + do j=n-1,1,-1 + if (x(j+1) < x(j)) then + xx = x(j) + i=j+1 + do + x(i-1) = x(i) + i = i+1 + if (i>n) exit + if (x(i) >= xx) exit + end do + x(i-1) = xx + endif + enddo +end subroutine psi_i2isr_up + +subroutine psi_i2isr_dw(n,x) + use psb_sort_mod, psb_protect_name => psi_i2isr_dw + use psb_error_mod + implicit none + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_) :: i,j + integer(psb_i2pk_) :: xx + + do j=n-1,1,-1 + if (x(j+1) > x(j)) then + xx = x(j) + i=j+1 + do + x(i-1) = x(i) + i = i+1 + if (i>n) exit + if (x(i) <= xx) exit + end do + x(i-1) = xx + endif + enddo +end subroutine psi_i2isr_dw + +subroutine psi_i2aisrx_up(n,x,idx) + use psb_sort_mod, psb_protect_name => psi_i2aisrx_up + use psb_error_mod + implicit none + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_ipk_), intent(inout) :: idx(:) + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_) :: i,j,ix + integer(psb_i2pk_) :: xx + + do j=n-1,1,-1 + if (abs(x(j+1)) < abs(x(j))) then + xx = x(j) + ix = idx(j) + i=j+1 + do + x(i-1) = x(i) + idx(i-1) = idx(i) + i = i+1 + if (i>n) exit + if (abs(x(i)) >= abs(xx)) exit + end do + x(i-1) = xx + idx(i-1) = ix + endif + enddo +end subroutine psi_i2aisrx_up + +subroutine psi_i2aisrx_dw(n,x,idx) + use psb_sort_mod, psb_protect_name => psi_i2aisrx_dw + use psb_error_mod + implicit none + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_ipk_), intent(inout) :: idx(:) + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_) :: i,j,ix + integer(psb_i2pk_) :: xx + + do j=n-1,1,-1 + if (abs(x(j+1)) > abs(x(j))) then + xx = x(j) + ix = idx(j) + i=j+1 + do + x(i-1) = x(i) + idx(i-1) = idx(i) + i = i+1 + if (i>n) exit + if (abs(x(i)) <= abs(xx)) exit + end do + x(i-1) = xx + idx(i-1) = ix + endif + enddo +end subroutine psi_i2aisrx_dw + +subroutine psi_i2aisr_up(n,x) + use psb_sort_mod, psb_protect_name => psi_i2aisr_up + use psb_error_mod + implicit none + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_) :: i,j + integer(psb_i2pk_) :: xx + + do j=n-1,1,-1 + if (abs(x(j+1)) < abs(x(j))) then + xx = x(j) + i=j+1 + do + x(i-1) = x(i) + i = i+1 + if (i>n) exit + if (abs(x(i)) >= abs(xx)) exit + end do + x(i-1) = xx + endif + enddo +end subroutine psi_i2aisr_up + +subroutine psi_i2aisr_dw(n,x) + use psb_sort_mod, psb_protect_name => psi_i2aisr_dw + use psb_error_mod + implicit none + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_) :: i,j + integer(psb_i2pk_) :: xx + + do j=n-1,1,-1 + if (abs(x(j+1)) > abs(x(j))) then + xx = x(j) + i=j+1 + do + x(i-1) = x(i) + i = i+1 + if (i>n) exit + if (abs(x(i)) <= abs(xx)) exit + end do + x(i-1) = xx + endif + enddo +end subroutine psi_i2aisr_dw + diff --git a/base/serial/sort/psb_i2_msort_impl.f90 b/base/serial/sort/psb_i2_msort_impl.f90 new file mode 100644 index 00000000..67835351 --- /dev/null +++ b/base/serial/sort/psb_i2_msort_impl.f90 @@ -0,0 +1,667 @@ + ! + ! Parallel Sparse BLAS version 3.5 + ! (C) Copyright 2006-2018 + ! Salvatore Filippone + ! Alfredo Buttari + ! + ! Redistribution and use in source and binary forms, with or without + ! modification, are permitted provided that the following conditions + ! are met: + ! 1. Redistributions of source code must retain the above copyright + ! notice, this list of conditions and the following disclaimer. + ! 2. Redistributions in binary form must reproduce the above copyright + ! notice, this list of conditions, and the following disclaimer in the + ! documentation and/or other materials provided with the distribution. + ! 3. The name of the PSBLAS group or the names of its contributors may + ! not be used to endorse or promote products derived from this + ! software without specific prior written permission. + ! + ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + ! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS + ! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + ! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + ! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + ! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + ! POSSIBILITY OF SUCH DAMAGE. + ! + ! + ! + ! The merge-sort routines + ! References: + ! D. Knuth + ! The Art of Computer Programming, vol. 3 + ! Addison-Wesley + ! + ! Aho, Hopcroft, Ullman + ! Data Structures and Algorithms + ! Addison-Wesley + ! +logical function psb_i2isaperm(n,eip) + use psb_sort_mod, psb_protect_name => psb_i2isaperm + implicit none + + integer(psb_i2pk_), intent(in) :: n + integer(psb_i2pk_), intent(in) :: eip(n) + integer(psb_i2pk_), allocatable :: ip(:) + integer(psb_i2pk_) :: i,j,m, info + + + psb_i2isaperm = .true. + if (n <= 0) return + allocate(ip(n), stat=info) + if (info /= psb_success_) return + ! + ! sanity check first + ! + do i=1, n + ip(i) = eip(i) + if ((ip(i) < 1).or.(ip(i) > n)) then + write(psb_err_unit,*) 'Out of bounds in isaperm' ,ip(i), n + psb_i2isaperm = .false. + return + endif + enddo + + ! + ! now work through the cycles, by marking each successive item as negative. + ! no cycle should intersect with any other, hence the >= 1 check. + ! + do m = 1, n + i = ip(m) + if (i < 0) then + ip(m) = -i + else if (i /= m) then + j = ip(i) + ip(i) = -j + i = j + do while ((j >= 1).and.(j /= m)) + j = ip(i) + ip(i) = -j + i = j + enddo + ip(m) = abs(ip(m)) + if (j /= m) then + psb_i2isaperm = .false. + goto 9999 + endif + end if + enddo +9999 continue + + return +end function psb_i2isaperm + + +subroutine psb_i2msort_u(x,nout,dir) + use psb_sort_mod, psb_protect_name => psb_i2msort_u + use psb_error_mod + implicit none + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_ipk_), intent(out) :: nout + integer(psb_ipk_), optional, intent(in) :: dir + + integer(psb_ipk_) :: n, k + integer(psb_ipk_) :: err_act + + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name + + name='psb_msort_u' + call psb_erractionsave(err_act) + + n = size(x) + + call psb_msort(x,dir=dir) + nout = min(1,n) + do k=2,n + if (x(k) /= x(nout)) then + nout = nout + 1 + x(nout) = x(k) + endif + enddo + + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_i2msort_u + +subroutine psb_i2msort(x,ix,dir,flag,reord) + use psb_sort_mod, psb_protect_name => psb_i2msort + use psb_error_mod + use psb_ip_reord_mod + implicit none + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_ipk_), optional, intent(in) :: dir, flag, reord + integer(psb_ipk_), optional, intent(inout) :: ix(:) + + integer(psb_ipk_) :: dir_, flag_, n, err_act, reord_ + + integer(psb_ipk_), allocatable :: iaux(:) + integer(psb_ipk_) :: iret, info, i + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name + + name='psb_i2msort' + call psb_erractionsave(err_act) + + if (present(reord)) then + reord_ = reord + else + reord_= psb_sort_reord_x_ + end if + if (present(dir)) then + dir_ = dir + else + dir_= psb_sort_up_ + end if + select case(dir_) + case( psb_sort_up_, psb_sort_down_, psb_asort_up_, psb_asort_down_) + ! OK keep going + case default + ierr(1) = 3; ierr(2) = dir_; + call psb_errpush(psb_err_input_value_invalid_i_,name,i_err=ierr) + goto 9999 + end select + + n = size(x) + + if (present(ix)) then + if (size(ix) < n) then + ierr(1) = 2; ierr(2) = size(ix); + call psb_errpush(psb_err_input_asize_invalid_i_,name,i_err=ierr) + goto 9999 + end if + if (present(flag)) then + flag_ = flag + else + flag_ = psb_sort_ovw_idx_ + end if + select case(flag_) + case(psb_sort_ovw_idx_) + do i=1,n + ix(i) = i + end do + case (psb_sort_keep_idx_) + ! OK keep going + case default + ierr(1) = 4; ierr(2) = flag_; + call psb_errpush(psb_err_input_value_invalid_i_,name,i_err=ierr) + goto 9999 + end select + + end if + + allocate(iaux(0:n+1),stat=info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,r_name='psb_i2_msort') + goto 9999 + endif + + select case(dir_) + case (psb_sort_up_) + call psi_i2_msort_up(n,x,iaux,iret) + case (psb_sort_down_) + call psi_i2_msort_dw(n,x,iaux,iret) + case (psb_asort_up_) + call psi_i2_amsort_up(n,x,iaux,iret) + case (psb_asort_down_) + call psi_i2_amsort_dw(n,x,iaux,iret) + end select + ! + ! Do the actual reordering, since the inner routines + ! only provide linked pointers. + ! + if (iret == 0 ) then + select case(reord_) + case(psb_sort_reord_x_) + if (present(ix)) then + call psb_ip_reord(n,x,ix,iaux) + else + call psb_ip_reord(n,x,iaux) + end if + case(psb_sort_noreord_x_) + if (present(ix)) then + call psb_ip_reord(n,ix,iaux) + else + call psb_errpush(psb_err_no_optional_arg_,name,a_err="ix") + goto 9999 + end if + case default + ierr(1) = 5; ierr(2) = reord_; + call psb_errpush(psb_err_input_value_invalid_i_,name,i_err=ierr) + goto 9999 + end select + end if + + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_i2msort + +subroutine psi_i2_msort_up(n,k,l,iret) + use psb_const_mod + implicit none + integer(psb_ipk_) :: n, iret + integer(psb_i2pk_) :: k(n) + integer(psb_ipk_) :: l(0:n+1) + ! + integer(psb_ipk_) :: p,q,s,t + ! .. + iret = 0 + ! first step: we are preparing ordered sublists, exploiting + ! what order was already in the input data; negative links + ! mark the end of the sublists + l(0) = 1 + t = n + 1 + do p = 1,n - 1 + if (k(p) <= k(p+1)) then + l(p) = p + 1 + else + l(t) = - (p+1) + t = p + end if + end do + l(t) = 0 + l(n) = 0 + ! see if the input was already sorted + if (l(n+1) == 0) then + iret = 1 + return + else + l(n+1) = abs(l(n+1)) + end if + + mergepass: do + ! otherwise, begin a pass through the list. + ! throughout all the subroutine we have: + ! p, q: pointing to the sublists being merged + ! s: pointing to the most recently processed record + ! t: pointing to the end of previously completed sublist + s = 0 + t = n + 1 + p = l(s) + q = l(t) + if (q == 0) exit mergepass + + outer: do + + if (k(p) > k(q)) then + + l(s) = sign(q,l(s)) + s = q + q = l(q) + if (q > 0) then + do + if (k(p) <= k(q)) cycle outer + s = q + q = l(q) + if (q <= 0) exit + end do + end if + l(s) = p + s = t + do + t = p + p = l(p) + if (p <= 0) exit + end do + + else + + l(s) = sign(p,l(s)) + s = p + p = l(p) + if (p>0) then + do + if (k(p) > k(q)) cycle outer + s = p + p = l(p) + if (p <= 0) exit + end do + end if + ! otherwise, one sublist ended, and we append to it the rest + ! of the other one. + l(s) = q + s = t + do + t = q + q = l(q) + if (q <= 0) exit + end do + end if + + p = -p + q = -q + if (q == 0) then + l(s) = sign(p,l(s)) + l(t) = 0 + exit outer + end if + end do outer + end do mergepass + +end subroutine psi_i2_msort_up + +subroutine psi_i2_msort_dw(n,k,l,iret) + use psb_const_mod + implicit none + integer(psb_ipk_) :: n, iret + integer(psb_i2pk_) :: k(n) + integer(psb_ipk_) :: l(0:n+1) + ! + integer(psb_ipk_) :: p,q,s,t + ! .. + iret = 0 + ! first step: we are preparing ordered sublists, exploiting + ! what order was already in the input data; negative links + ! mark the end of the sublists + l(0) = 1 + t = n + 1 + do p = 1,n - 1 + if (k(p) >= k(p+1)) then + l(p) = p + 1 + else + l(t) = - (p+1) + t = p + end if + end do + l(t) = 0 + l(n) = 0 + ! see if the input was already sorted + if (l(n+1) == 0) then + iret = 1 + return + else + l(n+1) = abs(l(n+1)) + end if + + mergepass: do + ! otherwise, begin a pass through the list. + ! throughout all the subroutine we have: + ! p, q: pointing to the sublists being merged + ! s: pointing to the most recently processed record + ! t: pointing to the end of previously completed sublist + s = 0 + t = n + 1 + p = l(s) + q = l(t) + if (q == 0) exit mergepass + + outer: do + + if (k(p) < k(q)) then + + l(s) = sign(q,l(s)) + s = q + q = l(q) + if (q > 0) then + do + if (k(p) >= k(q)) cycle outer + s = q + q = l(q) + if (q <= 0) exit + end do + end if + l(s) = p + s = t + do + t = p + p = l(p) + if (p <= 0) exit + end do + + else + + l(s) = sign(p,l(s)) + s = p + p = l(p) + if (p>0) then + do + if (k(p) < k(q)) cycle outer + s = p + p = l(p) + if (p <= 0) exit + end do + end if + ! otherwise, one sublist ended, and we append to it the rest + ! of the other one. + l(s) = q + s = t + do + t = q + q = l(q) + if (q <= 0) exit + end do + end if + + p = -p + q = -q + if (q == 0) then + l(s) = sign(p,l(s)) + l(t) = 0 + exit outer + end if + end do outer + end do mergepass + +end subroutine psi_i2_msort_dw + +subroutine psi_i2_amsort_up(n,k,l,iret) + use psb_const_mod + implicit none + integer(psb_ipk_) :: n, iret + integer(psb_i2pk_) :: k(n) + integer(psb_ipk_) :: l(0:n+1) + ! + integer(psb_ipk_) :: p,q,s,t + ! .. + iret = 0 + ! first step: we are preparing ordered sublists, exploiting + ! what order was already in the input data; negative links + ! mark the end of the sublists + l(0) = 1 + t = n + 1 + do p = 1,n - 1 + if (abs(k(p)) <= abs(k(p+1))) then + l(p) = p + 1 + else + l(t) = - (p+1) + t = p + end if + end do + l(t) = 0 + l(n) = 0 + ! see if the input was already sorted + if (l(n+1) == 0) then + iret = 1 + return + else + l(n+1) = abs(l(n+1)) + end if + + mergepass: do + ! otherwise, begin a pass through the list. + ! throughout all the subroutine we have: + ! p, q: pointing to the sublists being merged + ! s: pointing to the most recently processed record + ! t: pointing to the end of previously completed sublist + s = 0 + t = n + 1 + p = l(s) + q = l(t) + if (q == 0) exit mergepass + + outer: do + + if (abs(k(p)) > abs(k(q))) then + + l(s) = sign(q,l(s)) + s = q + q = l(q) + if (q > 0) then + do + if (abs(k(p)) <= abs(k(q))) cycle outer + s = q + q = l(q) + if (q <= 0) exit + end do + end if + l(s) = p + s = t + do + t = p + p = l(p) + if (p <= 0) exit + end do + + else + + l(s) = sign(p,l(s)) + s = p + p = l(p) + if (p>0) then + do + if (abs(k(p)) > abs(k(q))) cycle outer + s = p + p = l(p) + if (p <= 0) exit + end do + end if + ! otherwise, one sublist ended, and we append to it the rest + ! of the other one. + l(s) = q + s = t + do + t = q + q = l(q) + if (q <= 0) exit + end do + end if + + p = -p + q = -q + if (q == 0) then + l(s) = sign(p,l(s)) + l(t) = 0 + exit outer + end if + end do outer + end do mergepass + +end subroutine psi_i2_amsort_up + +subroutine psi_i2_amsort_dw(n,k,l,iret) + use psb_const_mod + implicit none + integer(psb_ipk_) :: n, iret + integer(psb_i2pk_) :: k(n) + integer(psb_ipk_) :: l(0:n+1) + ! + integer(psb_ipk_) :: p,q,s,t + ! .. + iret = 0 + ! first step: we are preparing ordered sublists, exploiting + ! what order was already in the input data; negative links + ! mark the end of the sublists + l(0) = 1 + t = n + 1 + do p = 1,n - 1 + if (abs(k(p)) >= abs(k(p+1))) then + l(p) = p + 1 + else + l(t) = - (p+1) + t = p + end if + end do + l(t) = 0 + l(n) = 0 + ! see if the input was already sorted + if (l(n+1) == 0) then + iret = 1 + return + else + l(n+1) = abs(l(n+1)) + end if + + mergepass: do + ! otherwise, begin a pass through the list. + ! throughout all the subroutine we have: + ! p, q: pointing to the sublists being merged + ! s: pointing to the most recently processed record + ! t: pointing to the end of previously completed sublist + s = 0 + t = n + 1 + p = l(s) + q = l(t) + if (q == 0) exit mergepass + + outer: do + + if (abs(k(p)) < abs(k(q))) then + + l(s) = sign(q,l(s)) + s = q + q = l(q) + if (q > 0) then + do + if (abs(k(p)) >= abs(k(q))) cycle outer + s = q + q = l(q) + if (q <= 0) exit + end do + end if + l(s) = p + s = t + do + t = p + p = l(p) + if (p <= 0) exit + end do + + else + + l(s) = sign(p,l(s)) + s = p + p = l(p) + if (p>0) then + do + if (abs(k(p)) < abs(k(q))) cycle outer + s = p + p = l(p) + if (p <= 0) exit + end do + end if + ! otherwise, one sublist ended, and we append to it the rest + ! of the other one. + l(s) = q + s = t + do + t = q + q = l(q) + if (q <= 0) exit + end do + end if + + p = -p + q = -q + if (q == 0) then + l(s) = sign(p,l(s)) + l(t) = 0 + exit outer + end if + end do outer + end do mergepass + +end subroutine psi_i2_amsort_dw + + diff --git a/base/serial/sort/psb_i2_qsort_impl.f90 b/base/serial/sort/psb_i2_qsort_impl.f90 new file mode 100644 index 00000000..cc0ee95b --- /dev/null +++ b/base/serial/sort/psb_i2_qsort_impl.f90 @@ -0,0 +1,1472 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! The quicksort routines +! References: +! D. Knuth +! The Art of Computer Programming, vol. 3 +! Addison-Wesley +! +! Aho, Hopcroft, Ullman +! Data Structures and Algorithms +! Addison-Wesley +! +function psb_i2bsrch(key,n,v,dir,find) result(ipos) + use psb_sort_mod, psb_protect_name => psb_i2bsrch + implicit none + integer(psb_ipk_) :: ipos, n + integer(psb_i2pk_) :: key + integer(psb_i2pk_) :: v(:) + integer(psb_ipk_), optional :: dir, find + + integer(psb_ipk_) :: lb, ub, m, i, k, dir_, find_ + + if (present(dir)) then + dir_ = dir + else + dir_ = psb_sort_up_ + end if + if (present(find)) then + find_ = find + else + find_ = psb_find_any_ + end if + + ipos = -1 + if (dir_ == psb_sort_up_) then + if (n<=5) then + do m=1,n + if (key == v(m)) then + ipos = m + exit + end if + enddo + + else + + lb = 1 + ub = n + + do while (lb.le.ub) + m = (lb+ub)/2 + if (key.eq.v(m)) then + ipos = m + exit + else if (key < v(m)) then + ub = m-1 + else + lb = m + 1 + end if + enddo + end if + select case(find_) + case (psb_find_any_ ) + ! do nothing + case (psb_find_last_le_ ) + if ((m>n) .or. (m<1)) then + m = n + do while (m>=1) + if (v(m)<=key) exit + m = m - 1 + end do + else + do while (mn) .or. (m<1)) then + m = 1 + do while (m<=n) + if (v(m)>=key) exit + m = m + 1 + end do + else + do while (m>1) + if (v(m-1)>=key) then + m=m-1 + else + exit + end if + end do + end if + ipos = max(m,1) + + case default + write(0,*) 'Wrong FIND' + end select + + + else if (dir_ == psb_sort_down_) then + write(0,*) ' bsrch on sort down not implemented' + else + write(0,*) ' bsrch wrong DIR ',dir_,psb_sort_up_,psb_sort_down_ + end if + return +end function psb_i2bsrch + +function psb_i2ssrch(key,n,v) result(ipos) + use psb_sort_mod, psb_protect_name => psb_i2ssrch + implicit none + integer(psb_ipk_) :: ipos, n + integer(psb_i2pk_) :: key + integer(psb_i2pk_) :: v(:) + + integer(psb_ipk_) :: i + + ipos = -1 + do i=1,n + if (key.eq.v(i)) then + ipos = i + return + end if + enddo + + return +end function psb_i2ssrch + +subroutine psb_i2qsort(x,ix,dir,flag,reord) + use psb_sort_mod, psb_protect_name => psb_i2qsort + use psb_error_mod + implicit none + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_ipk_), optional, intent(in) :: dir, flag,reord + integer(psb_ipk_), optional, intent(inout) :: ix(:) + + integer(psb_ipk_) :: dir_, flag_, err_act, i, reord_ + integer(psb_ipk_) :: n + integer(psb_i2pk_), allocatable :: tx(:) + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name + + name='psb_i2qsort' + call psb_erractionsave(err_act) + + + if (present(reord)) then + reord_ = reord + else + reord_= psb_sort_reord_x_ + end if + if (present(flag)) then + flag_ = flag + else + flag_ = psb_sort_ovw_idx_ + end if + select case(flag_) + case( psb_sort_ovw_idx_, psb_sort_keep_idx_) + ! OK keep going + case default + ierr(1) = 4; ierr(2) = flag_; + call psb_errpush(psb_err_input_value_invalid_i_,name,i_err=ierr) + goto 9999 + end select + + if (present(dir)) then + dir_ = dir + else + dir_= psb_sort_up_ + end if + + n = size(x) + + if (present(ix)) then + if (size(ix) < n) then + ierr(1) = 2; ierr(2) = size(ix); + call psb_errpush(psb_err_input_asize_invalid_i_,name,i_err=ierr) + goto 9999 + end if + if (flag_==psb_sort_ovw_idx_) then + do i=1,n + ix(i) = i + end do + end if + + select case(reord_) + case (psb_sort_reord_x_) + select case(dir_) + case (psb_sort_up_) + call psi_i2qsrx_up(n,x,ix) + case (psb_sort_down_) + call psi_i2qsrx_dw(n,x,ix) + case (psb_asort_up_) + call psi_i2aqsrx_up(n,x,ix) + case (psb_asort_down_) + call psi_i2aqsrx_dw(n,x,ix) + case default + ierr(1) = 3; ierr(2) = dir_; + call psb_errpush(psb_err_input_value_invalid_i_,name,i_err=ierr) + goto 9999 + end select + case(psb_sort_noreord_x_) + tx = x + select case(dir_) + case (psb_sort_up_) + call psi_i2qsrx_up(n,tx,ix) + case (psb_sort_down_) + call psi_i2qsrx_dw(n,tx,ix) + case (psb_asort_up_) + call psi_i2aqsrx_up(n,tx,ix) + case (psb_asort_down_) + call psi_i2aqsrx_dw(n,tx,ix) + case default + ierr(1) = 3; ierr(2) = dir_; + call psb_errpush(psb_err_input_value_invalid_i_,name,i_err=ierr) + goto 9999 + end select + end select + else + select case(reord_) + case (psb_sort_reord_x_) + !OK + case default + ierr(1) = 5; ierr(2) = reord_; + call psb_errpush(psb_err_input_value_invalid_i_,name,i_err=ierr) + goto 9999 + end select + + select case(dir_) + case (psb_sort_up_) + call psi_i2qsr_up(n,x) + case (psb_sort_down_) + call psi_i2qsr_dw(n,x) + case (psb_asort_up_) + call psi_i2aqsr_up(n,x) + case (psb_asort_down_) + call psi_i2aqsr_dw(n,x) + case default + ierr(1) = 3; ierr(2) = dir_; + call psb_errpush(psb_err_input_value_invalid_i_,name,i_err=ierr) + goto 9999 + end select + + end if + + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_i2qsort + +subroutine psi_i2qsrx_up(n,x,idx) + use psb_sort_mod, psb_protect_name => psi_i2qsrx_up + use psb_error_mod + implicit none + + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_ipk_), intent(inout) :: idx(:) + integer(psb_ipk_), intent(in) :: n + ! .. Local Scalars .. + integer(psb_i2pk_) :: piv, xk, xt + integer(psb_ipk_) :: i, j, ilx, iux, istp, lpiv + integer(psb_ipk_) :: n1, n2 + integer(psb_ipk_) :: ixt + integer(psb_ipk_), parameter :: maxstack=64,nparms=3,ithrs=72 + integer(psb_ipk_) :: istack(nparms,maxstack) + + if (n > ithrs) then + ! + ! Init stack pointer + ! + istp = 1 + istack(1,istp) = 1 + istack(2,istp) = n + + do + if (istp <= 0) exit + ilx = istack(1,istp) + iux = istack(2,istp) + istp = istp - 1 + ! + ! Choose a pivot with median-of-three heuristics, leave it + ! in the LPIV location + ! + i = ilx + j = iux + lpiv = (i+j)/2 + piv = x(lpiv) + if (piv < x(i)) then + xt = x(i) + ixt = idx(i) + x(i) = x(lpiv) + idx(i) = idx(lpiv) + x(lpiv) = xt + idx(lpiv) = ixt + piv = x(lpiv) + endif + if (piv > x(j)) then + xt = x(j) + ixt = idx(j) + x(j) = x(lpiv) + idx(j) = idx(lpiv) + x(lpiv) = xt + idx(lpiv) = ixt + piv = x(lpiv) + endif + if (piv < x(i)) then + xt = x(i) + ixt = idx(i) + x(i) = x(lpiv) + idx(i) = idx(lpiv) + x(lpiv) = xt + idx(lpiv) = ixt + piv = x(lpiv) + endif + ! + ! now piv is correct; place it into first location + xt = x(i) + ixt = idx(i) + x(i) = x(lpiv) + idx(i) = idx(lpiv) + x(lpiv) = xt + idx(lpiv) = ixt + piv = x(lpiv) + + i = ilx - 1 + j = iux + 1 + + outer_up: do + in_up1: do + i = i + 1 + xk = x(i) + if (xk >= piv) exit in_up1 + end do in_up1 + ! + ! Ensure finite termination for next loop + ! + xt = xk + x(i) = piv + in_up2:do + j = j - 1 + xk = x(j) + if (xk <= piv) exit in_up2 + end do in_up2 + x(i) = xt + + if (j > i) then + xt = x(i) + ixt = idx(i) + x(i) = x(j) + idx(i) = idx(j) + x(j) = xt + idx(j) = ixt + else + exit outer_up + end if + end do outer_up + if (i == ilx) then + if (x(i) /= piv) then + call psb_errpush(psb_err_internal_error_,& + & r_name='psi_i2qsrx',a_err='impossible pivot condition') + call psb_error() + endif + i = i + 1 + endif + + n1 = (i-1)-ilx+1 + n2 = iux-(i)+1 + if (n1 > n2) then + if (n1 > ithrs) then + istp = istp + 1 + istack(1,istp) = ilx + istack(2,istp) = i-1 + else + call psi_i2isrx_up(n1,x(ilx:i-1),idx(ilx:i-1)) + endif + if (n2 > ithrs) then + istp = istp + 1 + istack(1,istp) = i + istack(2,istp) = iux + else + call psi_i2isrx_up(n2,x(i:iux),idx(i:iux)) + endif + else + if (n2 > ithrs) then + istp = istp + 1 + istack(1,istp) = i + istack(2,istp) = iux + else + call psi_i2isrx_up(n2,x(i:iux),idx(i:iux)) + endif + if (n1 > ithrs) then + istp = istp + 1 + istack(1,istp) = ilx + istack(2,istp) = i-1 + else + call psi_i2isrx_up(n1,x(ilx:i-1),idx(ilx:i-1)) + endif + endif + enddo + else + call psi_i2isrx_up(n,x,idx) + endif +end subroutine psi_i2qsrx_up + +subroutine psi_i2qsrx_dw(n,x,idx) + use psb_sort_mod, psb_protect_name => psi_i2qsrx_dw + use psb_error_mod + implicit none + + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_ipk_), intent(inout) :: idx(:) + integer(psb_ipk_), intent(in) :: n + ! .. Local Scalars .. + integer(psb_i2pk_) :: piv, xk, xt + integer(psb_ipk_) :: i, j, ilx, iux, istp, lpiv + integer(psb_ipk_) :: n1, n2 + integer(psb_ipk_) :: ixt + + integer(psb_ipk_), parameter :: maxstack=64,nparms=3,ithrs=72 + integer(psb_ipk_) :: istack(nparms,maxstack) + + if (n > ithrs) then + ! + ! Init stack pointer + ! + istp = 1 + istack(1,istp) = 1 + istack(2,istp) = n + + do + if (istp <= 0) exit + ilx = istack(1,istp) + iux = istack(2,istp) + istp = istp - 1 + ! + ! Choose a pivot with median-of-three heuristics, leave it + ! in the LPIV location + ! + i = ilx + j = iux + lpiv = (i+j)/2 + piv = x(lpiv) + if (piv > x(i)) then + xt = x(i) + ixt = idx(i) + x(i) = x(lpiv) + idx(i) = idx(lpiv) + x(lpiv) = xt + idx(lpiv) = ixt + piv = x(lpiv) + endif + if (piv < x(j)) then + xt = x(j) + ixt = idx(j) + x(j) = x(lpiv) + idx(j) = idx(lpiv) + x(lpiv) = xt + idx(lpiv) = ixt + piv = x(lpiv) + endif + if (piv > x(i)) then + xt = x(i) + ixt = idx(i) + x(i) = x(lpiv) + idx(i) = idx(lpiv) + x(lpiv) = xt + idx(lpiv) = ixt + piv = x(lpiv) + endif + ! + ! now piv is correct; place it into first location + xt = x(i) + ixt = idx(i) + x(i) = x(lpiv) + idx(i) = idx(lpiv) + x(lpiv) = xt + idx(lpiv) = ixt + piv = x(lpiv) + + i = ilx - 1 + j = iux + 1 + + outer_dw: do + in_dw1: do + i = i + 1 + xk = x(i) + if (xk <= piv) exit in_dw1 + end do in_dw1 + ! + ! Ensure finite termination for next loop + ! + xt = xk + x(i) = piv + in_dw2:do + j = j - 1 + xk = x(j) + if (xk >= piv) exit in_dw2 + end do in_dw2 + x(i) = xt + + if (j > i) then + xt = x(i) + ixt = idx(i) + x(i) = x(j) + idx(i) = idx(j) + x(j) = xt + idx(j) = ixt + else + exit outer_dw + end if + end do outer_dw + if (i == ilx) then + if (x(i) /= piv) then + call psb_errpush(psb_err_internal_error_,& + & r_name='psi_i2qsrx',a_err='impossible pivot condition') + call psb_error() + endif + i = i + 1 + endif + + n1 = (i-1)-ilx+1 + n2 = iux-(i)+1 + if (n1 > n2) then + if (n1 > ithrs) then + istp = istp + 1 + istack(1,istp) = ilx + istack(2,istp) = i-1 + else + call psi_i2isrx_dw(n1,x(ilx:i-1),idx(ilx:i-1)) + endif + if (n2 > ithrs) then + istp = istp + 1 + istack(1,istp) = i + istack(2,istp) = iux + else + call psi_i2isrx_dw(n2,x(i:iux),idx(i:iux)) + endif + else + if (n2 > ithrs) then + istp = istp + 1 + istack(1,istp) = i + istack(2,istp) = iux + else + call psi_i2isrx_dw(n2,x(i:iux),idx(i:iux)) + endif + if (n1 > ithrs) then + istp = istp + 1 + istack(1,istp) = ilx + istack(2,istp) = i-1 + else + call psi_i2isrx_dw(n1,x(ilx:i-1),idx(ilx:i-1)) + endif + endif + enddo + else + call psi_i2isrx_dw(n,x,idx) + endif + +end subroutine psi_i2qsrx_dw + +subroutine psi_i2qsr_up(n,x) + use psb_sort_mod, psb_protect_name => psi_i2qsr_up + use psb_error_mod + implicit none + + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_ipk_), intent(in) :: n + ! .. + ! .. Local Scalars .. + integer(psb_i2pk_) :: piv, xt, xk + integer(psb_ipk_) :: i, j, ilx, iux, istp, lpiv + integer(psb_ipk_) :: n1, n2 + + integer(psb_ipk_), parameter :: maxstack=64,nparms=3,ithrs=72 + integer(psb_ipk_) :: istack(nparms,maxstack) + + + if (n > ithrs) then + ! + ! Init stack pointer + ! + istp = 1 + istack(1,istp) = 1 + istack(2,istp) = n + + do + if (istp <= 0) exit + ilx = istack(1,istp) + iux = istack(2,istp) + istp = istp - 1 + ! + ! Choose a pivot with median-of-three heuristics, leave it + ! in the LPIV location + ! + i = ilx + j = iux + lpiv = (i+j)/2 + piv = x(lpiv) + if (piv < x(i)) then + xt = x(i) + x(i) = x(lpiv) + x(lpiv) = xt + piv = x(lpiv) + endif + if (piv > x(j)) then + xt = x(j) + x(j) = x(lpiv) + x(lpiv) = xt + piv = x(lpiv) + endif + if (piv < x(i)) then + xt = x(i) + x(i) = x(lpiv) + x(lpiv) = xt + piv = x(lpiv) + endif + ! + ! now piv is correct; place it into first location + + xt = x(i) + x(i) = x(lpiv) + x(lpiv) = xt + + i = ilx - 1 + j = iux + 1 + + outer_up: do + in_up1: do + i = i + 1 + xk = x(i) + if (xk >= piv) exit in_up1 + end do in_up1 + ! + ! Ensure finite termination for next loop + ! + xt = xk + x(i) = piv + in_up2:do + j = j - 1 + xk = x(j) + if (xk <= piv) exit in_up2 + end do in_up2 + x(i) = xt + + if (j > i) then + xt = x(i) + x(i) = x(j) + x(j) = xt + else + exit outer_up + end if + end do outer_up + if (i == ilx) then + if (x(i) /= piv) then + call psb_errpush(psb_err_internal_error_,& + & r_name='psi_i2qsr',a_err='impossible pivot condition') + call psb_error() + endif + i = i + 1 + endif + + n1 = (i-1)-ilx+1 + n2 = iux-(i)+1 + if (n1 > n2) then + if (n1 > ithrs) then + istp = istp + 1 + istack(1,istp) = ilx + istack(2,istp) = i-1 + else + call psi_i2isr_up(n1,x(ilx:i-1)) + endif + if (n2 > ithrs) then + istp = istp + 1 + istack(1,istp) = i + istack(2,istp) = iux + else + call psi_i2isr_up(n2,x(i:iux)) + endif + else + if (n2 > ithrs) then + istp = istp + 1 + istack(1,istp) = i + istack(2,istp) = iux + else + call psi_i2isr_up(n2,x(i:iux)) + endif + if (n1 > ithrs) then + istp = istp + 1 + istack(1,istp) = ilx + istack(2,istp) = i-1 + else + call psi_i2isr_up(n1,x(ilx:i-1)) + endif + endif + enddo + else + call psi_i2isr_up(n,x) + endif + +end subroutine psi_i2qsr_up + +subroutine psi_i2qsr_dw(n,x) + use psb_sort_mod, psb_protect_name => psi_i2qsr_dw + use psb_error_mod + implicit none + + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_ipk_), intent(in) :: n + ! .. + ! .. Local Scalars .. + integer(psb_i2pk_) :: piv, xt, xk + integer(psb_ipk_) :: i, j, ilx, iux, istp, lpiv + integer(psb_ipk_) :: n1, n2 + + integer(psb_ipk_), parameter :: maxstack=64,nparms=3,ithrs=72 + integer(psb_ipk_) :: istack(nparms,maxstack) + + + if (n > ithrs) then + ! + ! Init stack pointer + ! + istp = 1 + istack(1,istp) = 1 + istack(2,istp) = n + + do + if (istp <= 0) exit + ilx = istack(1,istp) + iux = istack(2,istp) + istp = istp - 1 + ! + ! Choose a pivot with median-of-three heuristics, leave it + ! in the LPIV location + ! + i = ilx + j = iux + lpiv = (i+j)/2 + piv = x(lpiv) + if (piv > x(i)) then + xt = x(i) + x(i) = x(lpiv) + x(lpiv) = xt + piv = x(lpiv) + endif + if (piv < x(j)) then + xt = x(j) + x(j) = x(lpiv) + x(lpiv) = xt + piv = x(lpiv) + endif + if (piv > x(i)) then + xt = x(i) + x(i) = x(lpiv) + x(lpiv) = xt + piv = x(lpiv) + endif + ! + ! now piv is correct; place it into first location + + xt = x(i) + x(i) = x(lpiv) + x(lpiv) = xt + + i = ilx - 1 + j = iux + 1 + + outer_dw: do + in_dw1: do + i = i + 1 + xk = x(i) + if (xk <= piv) exit in_dw1 + end do in_dw1 + ! + ! Ensure finite termination for next loop + ! + xt = xk + x(i) = piv + in_dw2:do + j = j - 1 + xk = x(j) + if (xk >= piv) exit in_dw2 + end do in_dw2 + x(i) = xt + + if (j > i) then + xt = x(i) + x(i) = x(j) + x(j) = xt + else + exit outer_dw + end if + end do outer_dw + if (i == ilx) then + if (x(i) /= piv) then + call psb_errpush(psb_err_internal_error_, & + & r_name='psi_i2qsr',a_err='impossible pivot condition') + call psb_error() + endif + i = i + 1 + endif + + n1 = (i-1)-ilx+1 + n2 = iux-(i)+1 + if (n1 > n2) then + if (n1 > ithrs) then + istp = istp + 1 + istack(1,istp) = ilx + istack(2,istp) = i-1 + else + call psi_i2isr_dw(n1,x(ilx:i-1)) + endif + if (n2 > ithrs) then + istp = istp + 1 + istack(1,istp) = i + istack(2,istp) = iux + else + call psi_i2isr_dw(n2,x(i:iux)) + endif + else + if (n2 > ithrs) then + istp = istp + 1 + istack(1,istp) = i + istack(2,istp) = iux + else + call psi_i2isr_dw(n2,x(i:iux)) + endif + if (n1 > ithrs) then + istp = istp + 1 + istack(1,istp) = ilx + istack(2,istp) = i-1 + else + call psi_i2isr_dw(n1,x(ilx:i-1)) + endif + endif + enddo + else + call psi_i2isr_dw(n,x) + endif + +end subroutine psi_i2qsr_dw + +subroutine psi_i2aqsrx_up(n,x,idx) + use psb_sort_mod, psb_protect_name => psi_i2aqsrx_up + use psb_error_mod + implicit none + + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_ipk_), intent(inout) :: idx(:) + integer(psb_ipk_), intent(in) :: n + ! .. Local Scalars .. + integer(psb_i2pk_) :: piv, xk + integer(psb_i2pk_) :: xt + integer(psb_ipk_) :: i, j, ilx, iux, istp, lpiv + integer(psb_ipk_) :: n1, n2 + integer(psb_ipk_) :: ixt + + integer(psb_ipk_), parameter :: maxstack=64,nparms=3,ithrs=72 + integer(psb_ipk_) :: istack(nparms,maxstack) + + if (n > ithrs) then + ! + ! Init stack pointer + ! + istp = 1 + istack(1,istp) = 1 + istack(2,istp) = n + + do + if (istp <= 0) exit + ilx = istack(1,istp) + iux = istack(2,istp) + istp = istp - 1 + ! + ! Choose a pivot with median-of-three heuristics, leave it + ! in the LPIV location + ! + i = ilx + j = iux + lpiv = (i+j)/2 + piv = abs(x(lpiv)) + if (piv < abs(x(i))) then + xt = x(i) + ixt = idx(i) + x(i) = x(lpiv) + idx(i) = idx(lpiv) + x(lpiv) = xt + idx(lpiv) = ixt + piv = abs(x(lpiv)) + endif + if (piv > abs(x(j))) then + xt = x(j) + ixt = idx(j) + x(j) = x(lpiv) + idx(j) = idx(lpiv) + x(lpiv) = xt + idx(lpiv) = ixt + piv = abs(x(lpiv)) + endif + if (piv < abs(x(i))) then + xt = x(i) + ixt = idx(i) + x(i) = x(lpiv) + idx(i) = idx(lpiv) + x(lpiv) = xt + idx(lpiv) = ixt + piv = abs(x(lpiv)) + endif + ! + ! now piv is correct; place it into first location + xt = x(i) + ixt = idx(i) + x(i) = x(lpiv) + idx(i) = idx(lpiv) + x(lpiv) = xt + idx(lpiv) = ixt + + i = ilx - 1 + j = iux + 1 + + outer_up: do + in_up1: do + i = i + 1 + xk = abs(x(i)) + if (xk >= piv) exit in_up1 + end do in_up1 + ! + ! Ensure finite termination for next loop + ! + xt = x(i) + x(i) = piv + in_up2:do + j = j - 1 + xk = abs(x(j)) + if (xk <= piv) exit in_up2 + end do in_up2 + x(i) = xt + + if (j > i) then + xt = x(i) + ixt = idx(i) + x(i) = x(j) + idx(i) = idx(j) + x(j) = xt + idx(j) = ixt + else + exit outer_up + end if + end do outer_up + if (i == ilx) then + if (x(i) /= piv) then + call psb_errpush(psb_err_internal_error_, & + & r_name='psi_i2aqsrx',a_err='impossible pivot condition') + call psb_error() + endif + i = i + 1 + endif + + n1 = (i-1)-ilx+1 + n2 = iux-(i)+1 + if (n1 > n2) then + if (n1 > ithrs) then + istp = istp + 1 + istack(1,istp) = ilx + istack(2,istp) = i-1 + else + call psi_i2aisrx_up(n1,x(ilx:i-1),idx(ilx:i-1)) + endif + if (n2 > ithrs) then + istp = istp + 1 + istack(1,istp) = i + istack(2,istp) = iux + else + call psi_i2aisrx_up(n2,x(i:iux),idx(i:iux)) + endif + else + if (n2 > ithrs) then + istp = istp + 1 + istack(1,istp) = i + istack(2,istp) = iux + else + call psi_i2aisrx_up(n2,x(i:iux),idx(i:iux)) + endif + if (n1 > ithrs) then + istp = istp + 1 + istack(1,istp) = ilx + istack(2,istp) = i-1 + else + call psi_i2aisrx_up(n1,x(ilx:i-1),idx(ilx:i-1)) + endif + endif + enddo + else + call psi_i2aisrx_up(n,x,idx) + endif + + +end subroutine psi_i2aqsrx_up + +subroutine psi_i2aqsrx_dw(n,x,idx) + use psb_sort_mod, psb_protect_name => psi_i2aqsrx_dw + use psb_error_mod + implicit none + + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_ipk_), intent(inout) :: idx(:) + integer(psb_ipk_), intent(in) :: n + ! .. Local Scalars .. + integer(psb_i2pk_) :: piv, xk + integer(psb_i2pk_) :: xt + integer(psb_ipk_) :: i, j, ilx, iux, istp, lpiv + integer(psb_ipk_) :: n1, n2 + integer(psb_ipk_) :: ixt + + integer(psb_ipk_), parameter :: maxstack=64,nparms=3,ithrs=72 + integer(psb_ipk_) :: istack(nparms,maxstack) + if (n > ithrs) then + ! + ! Init stack pointer + ! + istp = 1 + istack(1,istp) = 1 + istack(2,istp) = n + + do + if (istp <= 0) exit + ilx = istack(1,istp) + iux = istack(2,istp) + istp = istp - 1 + ! + ! Choose a pivot with median-of-three heuristics, leave it + ! in the LPIV location + ! + i = ilx + j = iux + lpiv = (i+j)/2 + piv = abs(x(lpiv)) + if (piv > abs(x(i))) then + xt = x(i) + ixt = idx(i) + x(i) = x(lpiv) + idx(i) = idx(lpiv) + x(lpiv) = xt + idx(lpiv) = ixt + piv = abs(x(lpiv)) + endif + if (piv < abs(x(j))) then + xt = x(j) + ixt = idx(j) + x(j) = x(lpiv) + idx(j) = idx(lpiv) + x(lpiv) = xt + idx(lpiv) = ixt + piv = abs(x(lpiv)) + endif + if (piv > abs(x(i))) then + xt = x(i) + ixt = idx(i) + x(i) = x(lpiv) + idx(i) = idx(lpiv) + x(lpiv) = xt + idx(lpiv) = ixt + piv = abs(x(lpiv)) + endif + ! + ! now piv is correct; place it into first location + xt = x(i) + ixt = idx(i) + x(i) = x(lpiv) + idx(i) = idx(lpiv) + x(lpiv) = xt + idx(lpiv) = ixt + + i = ilx - 1 + j = iux + 1 + + outer_dw: do + in_dw1: do + i = i + 1 + xk = abs(x(i)) + if (xk <= piv) exit in_dw1 + end do in_dw1 + ! + ! Ensure finite termination for next loop + ! + xt = x(i) + x(i) = piv + in_dw2:do + j = j - 1 + xk = abs(x(j)) + if (xk >= piv) exit in_dw2 + end do in_dw2 + x(i) = xt + + if (j > i) then + xt = x(i) + ixt = idx(i) + x(i) = x(j) + idx(i) = idx(j) + x(j) = xt + idx(j) = ixt + else + exit outer_dw + end if + end do outer_dw + if (i == ilx) then + if (x(i) /= piv) then + call psb_errpush(psb_err_internal_error_,& + & r_name='psi_i2aqsrx',a_err='impossible pivot condition') + call psb_error() + endif + i = i + 1 + endif + + n1 = (i-1)-ilx+1 + n2 = iux-(i)+1 + if (n1 > n2) then + if (n1 > ithrs) then + istp = istp + 1 + istack(1,istp) = ilx + istack(2,istp) = i-1 + else + call psi_i2aisrx_dw(n1,x(ilx:i-1),idx(ilx:i-1)) + endif + if (n2 > ithrs) then + istp = istp + 1 + istack(1,istp) = i + istack(2,istp) = iux + else + call psi_i2aisrx_dw(n2,x(i:iux),idx(i:iux)) + endif + else + if (n2 > ithrs) then + istp = istp + 1 + istack(1,istp) = i + istack(2,istp) = iux + else + call psi_i2aisrx_dw(n2,x(i:iux),idx(i:iux)) + endif + if (n1 > ithrs) then + istp = istp + 1 + istack(1,istp) = ilx + istack(2,istp) = i-1 + else + call psi_i2aisrx_dw(n1,x(ilx:i-1),idx(ilx:i-1)) + endif + endif + enddo + else + call psi_i2aisrx_dw(n,x,idx) + endif + +end subroutine psi_i2aqsrx_dw + +subroutine psi_i2aqsr_up(n,x) + use psb_sort_mod, psb_protect_name => psi_i2aqsr_up + use psb_error_mod + implicit none + + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_ipk_), intent(in) :: n + ! .. Local Scalars .. + integer(psb_i2pk_) :: piv, xk + integer(psb_i2pk_) :: xt + integer(psb_ipk_) :: i, j, ilx, iux, istp, lpiv + integer(psb_ipk_) :: n1, n2 + integer(psb_ipk_) :: ixt + + integer(psb_ipk_), parameter :: maxstack=64,nparms=3,ithrs=72 + integer(psb_ipk_) :: istack(nparms,maxstack) + + if (n > ithrs) then + ! + ! Init stack pointer + ! + istp = 1 + istack(1,istp) = 1 + istack(2,istp) = n + + do + if (istp <= 0) exit + ilx = istack(1,istp) + iux = istack(2,istp) + istp = istp - 1 + ! + ! Choose a pivot with median-of-three heuristics, leave it + ! in the LPIV location + ! + i = ilx + j = iux + lpiv = (i+j)/2 + piv = abs(x(lpiv)) + if (piv < abs(x(i))) then + xt = x(i) + x(i) = x(lpiv) + x(lpiv) = xt + piv = abs(x(lpiv)) + endif + if (piv > abs(x(j))) then + xt = x(j) + x(j) = x(lpiv) + x(lpiv) = xt + piv = abs(x(lpiv)) + endif + if (piv < abs(x(i))) then + xt = x(i) + x(i) = x(lpiv) + x(lpiv) = xt + piv = abs(x(lpiv)) + endif + ! + ! now piv is correct; place it into first location + + xt = x(i) + x(i) = x(lpiv) + x(lpiv) = xt + + i = ilx - 1 + j = iux + 1 + + outer_up: do + in_up1: do + i = i + 1 + xk = abs(x(i)) + if (xk >= piv) exit in_up1 + end do in_up1 + ! + ! Ensure finite termination for next loop + ! + xt = x(i) + x(i) = piv + in_up2:do + j = j - 1 + xk = abs(x(j)) + if (xk <= piv) exit in_up2 + end do in_up2 + x(i) = xt + + if (j > i) then + xt = x(i) + x(i) = x(j) + x(j) = xt + else + exit outer_up + end if + end do outer_up + if (i == ilx) then + if (x(i) /= piv) then + call psb_errpush(psb_err_internal_error_, & + & r_name='psi_i2qasr',a_err='impossible pivot condition') + call psb_error() + endif + i = i + 1 + endif + + n1 = (i-1)-ilx+1 + n2 = iux-(i)+1 + if (n1 > n2) then + if (n1 > ithrs) then + istp = istp + 1 + istack(1,istp) = ilx + istack(2,istp) = i-1 + else + call psi_i2aisr_up(n1,x(ilx:i-1)) + endif + if (n2 > ithrs) then + istp = istp + 1 + istack(1,istp) = i + istack(2,istp) = iux + else + call psi_i2aisr_up(n2,x(i:iux)) + endif + else + if (n2 > ithrs) then + istp = istp + 1 + istack(1,istp) = i + istack(2,istp) = iux + else + call psi_i2aisr_up(n2,x(i:iux)) + endif + if (n1 > ithrs) then + istp = istp + 1 + istack(1,istp) = ilx + istack(2,istp) = i-1 + else + call psi_i2aisr_up(n1,x(ilx:i-1)) + endif + endif + enddo + else + call psi_i2aisr_up(n,x) + endif + +end subroutine psi_i2aqsr_up + +subroutine psi_i2aqsr_dw(n,x) + use psb_sort_mod, psb_protect_name => psi_i2aqsr_dw + use psb_error_mod + implicit none + + integer(psb_i2pk_), intent(inout) :: x(:) + integer(psb_ipk_), intent(in) :: n + ! .. Local Scalars .. + integer(psb_i2pk_) :: piv, xk + integer(psb_i2pk_) :: xt + integer(psb_ipk_) :: i, j, ilx, iux, istp, lpiv + integer(psb_ipk_) :: n1, n2 + integer(psb_ipk_) :: ixt + + integer(psb_ipk_), parameter :: maxstack=64,nparms=3,ithrs=72 + integer(psb_ipk_) :: istack(nparms,maxstack) + + if (n > ithrs) then + ! + ! Init stack pointer + ! + istp = 1 + istack(1,istp) = 1 + istack(2,istp) = n + + do + if (istp <= 0) exit + ilx = istack(1,istp) + iux = istack(2,istp) + istp = istp - 1 + ! + ! Choose a pivot with median-of-three heuristics, leave it + ! in the LPIV location + ! + i = ilx + j = iux + lpiv = (i+j)/2 + piv = abs(x(lpiv)) + if (piv > abs(x(i))) then + xt = x(i) + x(i) = x(lpiv) + x(lpiv) = xt + piv = abs(x(lpiv)) + endif + if (piv < abs(x(j))) then + xt = x(j) + x(j) = x(lpiv) + x(lpiv) = xt + piv = abs(x(lpiv)) + endif + if (piv > abs(x(i))) then + xt = x(i) + x(i) = x(lpiv) + x(lpiv) = xt + piv = abs(x(lpiv)) + endif + ! + ! now piv is correct; place it into first location + + xt = x(i) + x(i) = x(lpiv) + x(lpiv) = xt + + i = ilx - 1 + j = iux + 1 + + outer_dw: do + in_dw1: do + i = i + 1 + xk = abs(x(i)) + if (xk <= piv) exit in_dw1 + end do in_dw1 + ! + ! Ensure finite termination for next loop + ! + xt = x(i) + x(i) = piv + in_dw2:do + j = j - 1 + xk = abs(x(j)) + if (xk >= piv) exit in_dw2 + end do in_dw2 + x(i) = xt + + if (j > i) then + xt = x(i) + x(i) = x(j) + x(j) = xt + else + exit outer_dw + end if + end do outer_dw + if (i == ilx) then + if (x(i) /= piv) then + call psb_errpush(psb_err_internal_error_,& + & r_name='psi_i2qasr',a_err='impossible pivot condition') + call psb_error() + endif + i = i + 1 + endif + + n1 = (i-1)-ilx+1 + n2 = iux-(i)+1 + if (n1 > n2) then + if (n1 > ithrs) then + istp = istp + 1 + istack(1,istp) = ilx + istack(2,istp) = i-1 + else + call psi_i2aisr_dw(n1,x(ilx:i-1)) + endif + if (n2 > ithrs) then + istp = istp + 1 + istack(1,istp) = i + istack(2,istp) = iux + else + call psi_i2aisr_dw(n2,x(i:iux)) + endif + else + if (n2 > ithrs) then + istp = istp + 1 + istack(1,istp) = i + istack(2,istp) = iux + else + call psi_i2aisr_dw(n2,x(i:iux)) + endif + if (n1 > ithrs) then + istp = istp + 1 + istack(1,istp) = ilx + istack(2,istp) = i-1 + else + call psi_i2aisr_dw(n1,x(ilx:i-1)) + endif + endif + enddo + else + call psi_i2aisr_dw(n,x) + endif + +end subroutine psi_i2aqsr_dw + + diff --git a/base/tools/Makefile b/base/tools/Makefile index 771e85fc..1cd67af5 100644 --- a/base/tools/Makefile +++ b/base/tools/Makefile @@ -14,6 +14,7 @@ FOBJS = psb_cdall.o psb_cdals.o psb_cdalv.o psb_cd_inloc.o psb_cdins.o psb_cdprt psb_dallc.o psb_dasb.o psb_dfree.o psb_dins.o \ psb_callc.o psb_casb.o psb_cfree.o psb_cins.o \ psb_zallc.o psb_zasb.o psb_zfree.o psb_zins.o \ + psb_i2allc_a.o psb_i2asb_a.o psb_i2free_a.o psb_i2ins_a.o \ psb_mallc_a.o psb_masb_a.o psb_mfree_a.o psb_mins_a.o \ psb_eallc_a.o psb_easb_a.o psb_efree_a.o psb_eins_a.o \ psb_sallc_a.o psb_sasb_a.o psb_sfree_a.o psb_sins_a.o \ @@ -33,7 +34,7 @@ MPFOBJS = psb_icdasb.o psb_ssphalo.o psb_dsphalo.o psb_csphalo.o psb_zsphalo.o psb_dcdbldext.o psb_zcdbldext.o psb_scdbldext.o psb_ccdbldext.o \ psb_s_remote_mat.o psb_d_remote_mat.o psb_c_remote_mat.o psb_z_remote_mat.o \ psb_s_remote_vect.o psb_d_remote_vect.o psb_c_remote_vect.o psb_z_remote_vect.o \ - psb_e_remote_vect.o psb_m_remote_vect.o + psb_e_remote_vect.o psb_m_remote_vect.o psb_i2_remote_vect.o LIBDIR=.. INCDIR=..