diff --git a/main.py b/main.py
index e5238cd..46e8880 100755
--- a/main.py
+++ b/main.py
@@ -7,28 +7,11 @@ import json
OUTPUT_FILE = "conferences.json"
-LLM_EXAMPLE = (
- "INPUT:\n"
- '
Statistical'
- " and Computational Aspects of Dynamics
Organized by Buddhima Kasun Fernando Akurugodage"
- " (Centro di ricerca matematica Ennio De Giorgi – SNS), Paolo Giulietti, and Tanja"
- " Isabelle Schindler (Universität Wien, Austria). Centro De Giorgi – SNS, Pisa. December 13"
- " – 16, 2022.
\n"
- "\n"
- "OUTPUT (JSON): \n"
- "{"
- '"title": "Statistical and Computational Aspects of Dynamics",'
- '"url": "http://www.crm.sns.it/event/507/", '
- '"description": "Organized by Buddhima Kasun Fernando Akurugodage (Centro di ricerca matematica'
- " Ennio De Giorgi – SNS), Paolo Giulietti, and Tanja Isabelle Schindler (Universität Wien,"
- ' Austria). Centro De Giorgi - SNS, Pisa.", '
- '"startDate": "2022-12-13", '
- '"endDate": "2022-12-16"'
- "}\n"
- "\n"
- "INPUT:\n"
-)
+HTML_EXAMPLE = r"""Statistical and Computational Aspects of Dynamics
Organized by Buddhima Kasun Fernando Akurugodage (Centro di ricerca matematica Ennio De Giorgi – SNS), Paolo Giulietti, and Tanja Isabelle Schindler (Universität Wien, Austria). Centro De Giorgi – SNS, Pisa. December 13 – 16, 2022.
"""
+OUTPUT_EXAMPLE = json.dumps(
+ { "title": "Statistical and Computational Aspects of Dynamics", "url": "http://www.crm.sns.it/event/507/", "description": "Organized by Buddhima Kasun Fernando Akurugodage (Centro di ricerca matematica Ennio De Giorgi – SNS), Paolo Giulietti, and Tanja Isabelle Schindler (Universität Wien, Austria). Location: Centro De Giorgi - SNS, Pisa.", "startDate": "2022-12-13", "endDate": "2022-12-16" }
+)
def translate_to_json(conference_html: str) -> str:
llm_answer = llm.create_chat_completion(
@@ -36,11 +19,15 @@ def translate_to_json(conference_html: str) -> str:
messages=[
{
"role": "system",
- "content": "You are an assistant aiding a software developer. Be precise in formatting the output correctly as requested",
+ "content": "You are an assistant. Be precise in formatting the output and only output valid JSON using the specificied fields, without including additional fields or comments.",
},
- {"role": "user", "content": LLM_EXAMPLE},
- {"role": "user", "content": conference_html},
- {"role": "user", "content": "OUTPUT (JSON):"},
+ {"role": "user", "content": "INPUT:"},
+ {"role": "user", "content": HTML_EXAMPLE },
+ {"role": "user", "content": "OUTPUT:"},
+ {"role": "user", "content": OUTPUT_EXAMPLE},
+ {"role": "user", "content": "INPUT:"},
+ {"role": "user", "content": conference_html },
+ {"role": "user", "content": "OUTPUT:"},
],
)
@@ -83,9 +70,6 @@ page_urls = [baseurl] + [baseurl + str(i) for i in range(2, 7)]
conference_html_snippets = [snippet for link in page_urls for snippet in crawl_page(link)]
-print("LLM Example Context:")
-print(LLM_EXAMPLE)
-
# Load the model and, set the chat format and use the default model context length
llm = Llama(model_path="./mistral-7b-instruct-v0.2.Q4_K_M.gguf", chat_format="llama-2", n_ctx=0)
@@ -97,6 +81,7 @@ open(OUTPUT_FILE, "w").close()
results_file = open(OUTPUT_FILE, "a")
for conference_html in conference_html_snippets:
+ print("--------------------------------------------------")
print("Translating:")
print(conference_html)