#include #include #include #include #include #include #include #include #include using namespace std; // It receives the file name as a string and returns a dictionary with the keys being the UserID and the values being a vector of VenueID associated with that UserID. unordered_map> createDictFromFile(string filename) { // Create an empty dictionary unordered_map> dict; // Open the file ifstream file(filename); // Check if the file was opened successfully if (!file.is_open()) { cerr << "Error opening file " << filename << endl; return dict; } // Read the file line by line string userId, venueId; while (file.good()) { file >> userId >> venueId; // Add the venueId to the vector of venues associated with the userId dict[userId].insert(venueId); } cout << "Dict created" << endl; // Return the dictionary return dict; } // void create_tsv_multi(unordered_map> dict, mutex& dict_mutex) { // // Create an output stream to write the file // ofstream out_file("output.tsv"); // // Create a mutex to protect the output file // mutex out_file_mutex; // // Loop over all the key-value pairs in the map // for (const auto& kv1 : dict) { // for (const auto& kv2 : dict) { // // Check if the keys are the same // if (kv1.first == kv2.first) continue; // // Check if the values have elements in common // vector common; // for (const auto& str1 : kv1.second) { // for (const auto& str2 : kv2.second) { // if (str1 == str2) common.push_back(str1); // } // } // // Write the keys and the number of common elements to the output file // if (!common.empty()) { // // Lock the mutexes before accessing the dict and the output file // lock_guard dict_guard(dict_mutex); // lock_guard out_file_guard(out_file_mutex); // out_file << kv1.first << "\t" << kv2.first << "\t" << common.size() << endl; // } // } // } // } void create_tsv(const unordered_map>& dict, string outfilename) { // Create an output stream to write the file ofstream out_file(outfilename); // Loop over all the key-value pairs in the map unsigned long long i = 0; for (const auto& kv1 : dict) { if (!((++i) & 127)) cout << (((double)i) * 100 / dict.size()) << "%\r" << flush; for (const auto& kv2 : dict) { // Check if the keys are the same if(kv1.first >= kv2.first) continue; // Check if the values have elements in common set common; set_intersection(kv1.second.begin(), kv1.second.end(), kv2.second.begin(), kv2.second.end(), inserter(common, common.begin())); // Write the keys and the number of common elements to the output file if (!common.empty()) { out_file << kv1.first << "\t" << kv2.first << "\t" << common.size() << endl; // cout << kv1.first << "\t" << kv2.first << "\t" << common.size() << endl; } } } } void print_help() { cout << "Usage: ./main IN_FILE_PATH OUT_FILE_PATH" << endl; cout << "Suggested options: \n\t./main data/brightkite/brightkite_checkins.txt data/brightkite/brightkite_checkins_graph.tsv \n\t./main data/gowalla/gowalla_checkins.txt data/gowalla/gowalla_checkins_graph.tsv \n\t./main data/foursquare/foursquare_checkins.txt data/foursquare/foursquare_checkins_graph.tsv" << endl; } // int main() { // unordered_map> dict = createDictFromFile("data/foursquare/foursquare_checkins_TKY.txt"); // create_tsv(dict); // } int main(int argc, const char* argv[]) { if (argc == 3) { string in_file = argv[1]; string out_file = argv[2]; if (in_file == "-h" || in_file == "--help" || out_file == "-h" || out_file == "--help") { print_help(); return 0; } unordered_map> dict = createDictFromFile(in_file); create_tsv(dict, out_file); return 0; } else { print_help(); return 0; } }