diff --git a/main.py b/main.py index e5238cd..46e8880 100755 --- a/main.py +++ b/main.py @@ -7,28 +7,11 @@ import json OUTPUT_FILE = "conferences.json" -LLM_EXAMPLE = ( - "INPUT:\n" - '

Statistical' - " and Computational Aspects of Dynamics
Organized by Buddhima Kasun Fernando Akurugodage" - " (Centro di ricerca matematica Ennio De Giorgi – SNS), Paolo Giulietti, and Tanja" - " Isabelle Schindler (Universität Wien, Austria). Centro De Giorgi – SNS, Pisa. December 13" - " – 16, 2022.

\n" - "\n" - "OUTPUT (JSON): \n" - "{" - '"title": "Statistical and Computational Aspects of Dynamics",' - '"url": "http://www.crm.sns.it/event/507/", ' - '"description": "Organized by Buddhima Kasun Fernando Akurugodage (Centro di ricerca matematica' - " Ennio De Giorgi – SNS), Paolo Giulietti, and Tanja Isabelle Schindler (Universität Wien," - ' Austria). Centro De Giorgi - SNS, Pisa.", ' - '"startDate": "2022-12-13", ' - '"endDate": "2022-12-16"' - "}\n" - "\n" - "INPUT:\n" -) +HTML_EXAMPLE = r"""

Statistical and Computational Aspects of Dynamics
Organized by Buddhima Kasun Fernando Akurugodage (Centro di ricerca matematica Ennio De Giorgi – SNS), Paolo Giulietti, and Tanja Isabelle Schindler (Universität Wien, Austria). Centro De Giorgi – SNS, Pisa. December 13 – 16, 2022.

""" +OUTPUT_EXAMPLE = json.dumps( + { "title": "Statistical and Computational Aspects of Dynamics", "url": "http://www.crm.sns.it/event/507/", "description": "Organized by Buddhima Kasun Fernando Akurugodage (Centro di ricerca matematica Ennio De Giorgi – SNS), Paolo Giulietti, and Tanja Isabelle Schindler (Universität Wien, Austria). Location: Centro De Giorgi - SNS, Pisa.", "startDate": "2022-12-13", "endDate": "2022-12-16" } +) def translate_to_json(conference_html: str) -> str: llm_answer = llm.create_chat_completion( @@ -36,11 +19,15 @@ def translate_to_json(conference_html: str) -> str: messages=[ { "role": "system", - "content": "You are an assistant aiding a software developer. Be precise in formatting the output correctly as requested", + "content": "You are an assistant. Be precise in formatting the output and only output valid JSON using the specificied fields, without including additional fields or comments.", }, - {"role": "user", "content": LLM_EXAMPLE}, - {"role": "user", "content": conference_html}, - {"role": "user", "content": "OUTPUT (JSON):"}, + {"role": "user", "content": "INPUT:"}, + {"role": "user", "content": HTML_EXAMPLE }, + {"role": "user", "content": "OUTPUT:"}, + {"role": "user", "content": OUTPUT_EXAMPLE}, + {"role": "user", "content": "INPUT:"}, + {"role": "user", "content": conference_html }, + {"role": "user", "content": "OUTPUT:"}, ], ) @@ -83,9 +70,6 @@ page_urls = [baseurl] + [baseurl + str(i) for i in range(2, 7)] conference_html_snippets = [snippet for link in page_urls for snippet in crawl_page(link)] -print("LLM Example Context:") -print(LLM_EXAMPLE) - # Load the model and, set the chat format and use the default model context length llm = Llama(model_path="./mistral-7b-instruct-v0.2.Q4_K_M.gguf", chat_format="llama-2", n_ctx=0) @@ -97,6 +81,7 @@ open(OUTPUT_FILE, "w").close() results_file = open(OUTPUT_FILE, "a") for conference_html in conference_html_snippets: + print("--------------------------------------------------") print("Translating:") print(conference_html)