From 3012e011686e5bfbd60c206308bd1aa0244ac03a Mon Sep 17 00:00:00 2001 From: Isabella Inuso Date: Fri, 30 Sep 2022 18:46:27 +0200 Subject: [PATCH] Primo commit --- .gitignore | 13 + Graph.cpp | 128 +++++++++ Progetto.cpp | 54 ++++ README.md | 6 + aux.cpp | 59 ++++ populateGraph.cpp | 21 ++ reader.cpp | 88 ++++++ stop_words.txt | 665 ++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 1034 insertions(+) create mode 100644 .gitignore create mode 100644 Graph.cpp create mode 100644 Progetto.cpp create mode 100644 README.md create mode 100644 aux.cpp create mode 100644 populateGraph.cpp create mode 100644 reader.cpp create mode 100644 stop_words.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..89abdcc --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ + +.vscode/ + +# Dati +*.zip +wiki*/ + +# Test +*Test* +*.exe + +# Eseguibili +Progetto diff --git a/Graph.cpp b/Graph.cpp new file mode 100644 index 0000000..3fdc8a4 --- /dev/null +++ b/Graph.cpp @@ -0,0 +1,128 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include "aux.cpp" + +using namespace std; + +struct Edge { + string from, to; + int weight; + // void print() { + // cout << from << " --> " << to << " w: " << weight << endl; + // } +}; + + +bool compareEdge(Edge e1, Edge e2) { + return (e1.weight > e2.weight); +} + +class UndirectedWeightedGraph { + private: + void addWeightedEdge(string from, string to, int weight) { + + for (Edge& edge : AdjList[from]) { + if (to == edge.to) { + edge.weight += weight; + return; + } + } + Edge newEdge; + newEdge.from = from; + newEdge.to = to; + newEdge.weight = weight; + AdjList[from].push_back(newEdge); + } + public: + unordered_map> AdjList; + + void addEdge(string node1, string node2) { + addWeightedEdge(node1, node2, 1); + addWeightedEdge(node2, node1, 1); + } + + // void print() { + // for (auto& pair : AdjList) { + // for (Edge& edge : pair.second) { + // edge.print(); + // } + // } + // } + + bool checkComb(vector star) { + auto it = star.begin(); + auto v = AdjList[(*it).to]; + unordered_set acc; + for (auto& e : v) { + acc.insert(e.to); + } + + it++; + + for (; it != star.end(); it++) { + auto v = AdjList[(*it).to]; + unordered_set s; + for (auto& e : v) { + s.insert(e.to); + } + acc = intersectSets(acc,s); + } + + return acc.size() == 1; + } + + vector> findSol(int c, int k) { + vector> Sol; + + vector keys = getShuffleKeys(AdjList); + + for (auto& key : keys) { + auto node = AdjList[key]; + + if (c == 0) + break; + + if (node.size() >= k) { + + sort(node.begin(), node.end(), compareEdge); + + vector indices(k); + iota(indices.begin(), indices.end(), 0); + do { + + vector subV; + for (int i : indices) + subV.push_back(node[i]); + + if (checkComb(subV)) { + c--; + Sol.push_back(subV); + break; + } + + } while(!nextComb(indices, node.size())); + } + } + + return Sol; + } + + void printSol(vector> Sol) { + for (auto& edges : Sol) { + cout << edges[0].from << ":" << " "; + for (auto& edge : edges) { + cout << edge.to << " "; + } + cout << endl; + } + } + +}; \ No newline at end of file diff --git a/Progetto.cpp b/Progetto.cpp new file mode 100644 index 0000000..38b5f34 --- /dev/null +++ b/Progetto.cpp @@ -0,0 +1,54 @@ + +#include +#include "populateGraph.cpp" + +using namespace std; + +#define ERR(msg) {fprintf(stderr, "%s\n", msg); exit(2);} + +int main(int argc,char const *argv[]) { + int k = 5; + int count = 1; + + vector Files; + + // configurazione opzioni + + for (int i = 1; i < argc; i++) { + string arg(argv[i]); + + if (arg == "-c") { + i++; + count = atoi(argv[i]); + } + else if (arg == "-k") { + i++; + k = atoi(argv[i]); + } + else { + Files.push_back(arg); + } + } + + UndirectedWeightedGraph graph; + + for (string File : Files) { + + ifstream s(File); + if (!s) ERR("errore di apertura del file") + + stringstream buffer; + buffer << s.rdbuf(); + //parsing del file + vector phrases = parseFile(buffer.str()); + + //popolazione del grafo + populateGraph(&graph, phrases); + } + + vector> Sol = graph.findSol(count, k); + graph.printSol(Sol); + + return 0; + +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..72370e1 --- /dev/null +++ b/README.md @@ -0,0 +1,6 @@ +# Progetto di ASD di Isabella + +## Usage + + $ g++ Progetto.cpp -o Progetto + $ ./Progetto -k 5 -c 10 \ No newline at end of file diff --git a/aux.cpp b/aux.cpp new file mode 100644 index 0000000..363c653 --- /dev/null +++ b/aux.cpp @@ -0,0 +1,59 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +bool nextComb(vector &indices, int n) { + int k = indices.size(); + int i = n - 1; + int j = k - 1; + while (indices[j] == i) { + if (j == 0) + return 1; + i--; + j--; + } + indices[j]++; + for (int l = 0; l <= k - j - 1; l++) { + indices[j + l] = indices[j] + l; + } + return 0; +} + +template +unordered_set intersectSets(unordered_set s1, unordered_set s2) { + unordered_set s; + if (s1.size() <= s2.size()) { + for (auto &el : s1) { + if (s2.find(el) != s2.end()) { + s.insert(el); + } + } + } + else { + for (auto &el : s2) { + if (s1.find(el) != s1.end()) { + s.insert(el); + } + } + } + return s; +} + +template +vector getShuffleKeys(unordered_map map) { + srand(time(NULL)); + vector output; + for (auto& pair : map) { + output.push_back(pair.first); + } + random_shuffle(output.begin(), output.end()); + return output; +} \ No newline at end of file diff --git a/populateGraph.cpp b/populateGraph.cpp new file mode 100644 index 0000000..3dc0c15 --- /dev/null +++ b/populateGraph.cpp @@ -0,0 +1,21 @@ +#pragma once + +#include "reader.cpp" +#include "Graph.cpp" + +using namespace std; + +void populateGraph(UndirectedWeightedGraph* graph, vector phrases) { + // prendo una parola nel periodo e aggiungo al grafo un arco con la parola dopo e quella dopo ancora se esistono, poi passo al periodo successivo + + for (phrase phrase : phrases) { + for (int i = 0; i < phrase.size(); i++) { + if (i+1 < phrase.size()) { + graph->addEdge(phrase[i], phrase[i+1]); + } + // if (i+2 < phrase.size()) { + // graph->addEdge(phrase[i], phrase[i+2]); + // } + } + } +} \ No newline at end of file diff --git a/reader.cpp b/reader.cpp new file mode 100644 index 0000000..dbab4c0 --- /dev/null +++ b/reader.cpp @@ -0,0 +1,88 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +typedef vector phrase; + +bool isWordChar(char a) { + return (('a' <= a && a <= 'z') || ('A' <= a && a <= 'Z') || ('0' <= a && a <= '9') || a == '@'); +} + +bool isPhraseBreak(char a) { + return ((a == '!') || (a == ',') || (a == '.') || (a == ':') || (a == ';') || (a == '?') || (a == '(') || (a == ')') || (a == '/') || (a == '\\') || (a == '\n') || (a == '[') || (a == ']') || (a == '_') || (a == '{') || (a == '}') || (a == 126 /* tilde */)); +} + +unordered_set getStopWords() { + unordered_set swords; + + ifstream stop_words("stop_words.txt"); + string line; + while (getline(stop_words, line)) { + swords.insert(line); + } + + return swords; +} + +char asciitolower(char in) { + if (in <= 'Z' && in >= 'A') + return in - ('Z' - 'z'); + + return in; +} + +vector parseFile (string text) { + + for (size_t i = 0; i < text.size(); i++) + { + text[i] = asciitolower(text[i]); + } + + + unordered_set stop_words = getStopWords(); + + // prendo un vettore, raggruppo caratteri tra spazi in stringhe fino a punteggiatura, raggruppo queste stringhe in periodi che inserisco in un nuovo vettore + vector phrases; + phrase phrase; + string word; + for (int i = 0; i < text.size(); i++) { + if (isWordChar(text[i])) { + word.append(1, text[i]); + } + else if (isPhraseBreak(text[i])) { + if (word.size() > 0 && stop_words.find(word) == stop_words.end()) { + phrase.push_back(word); + } + word = ""; + if (phrase.size() > 0) { + phrases.push_back(phrase); + phrase.clear(); + } + } + else { + if (word.size() > 0 && stop_words.find(word) == stop_words.end()) { + phrase.push_back(word); + } + + word = ""; + } + } + if (word.size() > 0 && stop_words.find(word) == stop_words.end()) { + phrase.push_back(word); + } + if (phrase.size() > 0) { + phrases.push_back(phrase); + phrase.clear(); + } + word = ""; + + return phrases; +} \ No newline at end of file diff --git a/stop_words.txt b/stop_words.txt new file mode 100644 index 0000000..d21c2cc --- /dev/null +++ b/stop_words.txt @@ -0,0 +1,665 @@ +a +able +about +above +abst +accordance +according +accordingly +across +act +actually +added +adj +affected +affecting +affects +after +afterwards +again +against +ah +all +almost +alone +along +already +also +although +always +am +among +amongst +an +and +announce +another +any +anybody +anyhow +anymore +anyone +anything +anyway +anyways +anywhere +apparently +approximately +are +aren +arent +arise +around +as +aside +ask +asking +at +auth +available +away +awfully +b +back +be +became +because +become +becomes +becoming +been +before +beforehand +begin +beginning +beginnings +begins +behind +being +believe +below +beside +besides +between +beyond +biol +both +brief +briefly +but +by +c +ca +came +can +cannot +can't +cause +causes +certain +certainly +co +com +come +comes +contain +containing +contains +could +couldnt +d +date +did +didn't +different +do +does +doesn't +doing +done +don't +down +downwards +due +during +e +each +ed +edu +effect +eg +eight +eighty +either +else +elsewhere +end +ending +enough +especially +et +et-al +etc +even +ever +every +everybody +everyone +everything +everywhere +ex +except +f +far +few +ff +fifth +first +five +fix +followed +following +follows +for +former +formerly +forth +found +four +from +further +furthermore +g +gave +get +gets +getting +give +given +gives +giving +go +goes +gone +got +gotten +h +had +happens +hardly +has +hasn't +have +haven't +having +he +hed +hence +her +here +hereafter +hereby +herein +heres +hereupon +hers +herself +hes +hi +hid +him +himself +his +hither +home +how +howbeit +however +hundred +i +id +ie +if +i'll +im +immediate +immediately +importance +important +in +inc +indeed +index +information +instead +into +invention +inward +is +isn't +it +itd +it'll +its +itself +i've +j +just +k +keep keeps +kept +kg +km +know +known +knows +l +largely +last +lately +later +latter +latterly +least +less +lest +let +lets +like +liked +likely +line +little +'ll +look +looking +looks +ltd +m +made +mainly +make +makes +many +may +maybe +me +mean +means +meantime +meanwhile +merely +mg +might +million +miss +ml +more +moreover +most +mostly +mr +mrs +much +mug +must +my +myself +n +na +name +namely +nay +nd +near +nearly +necessarily +necessary +need +needs +neither +never +nevertheless +new +next +nine +ninety +no +nobody +non +none +nonetheless +noone +nor +normally +nos +not +noted +nothing +now +nowhere +o +obtain +obtained +obviously +of +off +often +oh +ok +okay +old +omitted +on +once +one +ones +only +onto +or +ord +other +others +otherwise +ought +our +ours +ourselves +out +outside +over +overall +owing +own +p +page +pages +part +particular +particularly +past +per +perhaps +placed +please +plus +poorly +possible +possibly +potentially +pp +predominantly +present +previously +primarily +probably +promptly +proud +provides +put +q +que +quickly +quite +qv +r +ran +rather +rd +re +readily +really +recent +recently +ref +refs +regarding +regardless +regards +related +relatively +research +respectively +resulted +resulting +results +right +run +s +said +same +saw +say +saying +says +sec +section +see +seeing +seem +seemed +seeming +seems +seen +self +selves +sent +seven +several +shall +she +shed +she'll +shes +should +shouldn't +show +showed +shown +showns +shows +significant +significantly +similar +similarly +since +six +slightly +so +some +somebody +somehow +someone +somethan +something +sometime +sometimes +somewhat +somewhere +soon +sorry +specifically +specified +specify +specifying +still +stop +strongly +sub +substantially +successfully +such +sufficiently +suggest +sup +sure t +take +taken +taking +tell +tends +th +than +thank +thanks +thanx +that +that'll +thats +that've +the +their +theirs +them +themselves +then +thence +there +thereafter +thereby +thered +therefore +therein +there'll +thereof +therere +theres +thereto +thereupon +there've +these +they +theyd +they'll +theyre +they've +think +this +those +thou +though +thoughh +thousand +throug +through +throughout +thru +thus +til +tip +to +together +too +took +toward +towards +tried +tries +truly +try +trying +ts +twice +two +u +un +under +unfortunately +unless +unlike +unlikely +until +unto +up +upon +ups +us +use +used +useful +usefully +usefulness +uses +using +usually +v +value +various +'ve +very +via +viz +vol +vols +vs +w +want +wants +was +wasnt +way +we +wed +welcome +we'll +went +were +werent +we've +what +whatever +what'll +whats +when +whence +whenever +where +whereafter +whereas +whereby +wherein +wheres +whereupon +wherever +whether +which +while +whim +whither +who +whod +whoever +whole +who'll +whom +whomever +whos +whose +why +widely +willing +wish +with +within +without +wont +words +world +would +wouldnt +www +x +y +yes +yet +you +youd +you'll +your +youre +yours +yourself +yourselves +you've +z +zero \ No newline at end of file