diff --git a/main.py b/main.py
index 854c039..2634469 100755
--- a/main.py
+++ b/main.py
@@ -6,7 +6,7 @@ import requests
import json
-OUTPUT_FILE = "conferences.json"
+OUTPUT_FILE = "results.json"
HTML_EXAMPLE = r"""
Statistical and Computational Aspects of Dynamics
Organized by Buddhima Kasun Fernando Akurugodage (Centro di ricerca matematica Ennio De Giorgi – SNS), Paolo Giulietti, and Tanja Isabelle Schindler (Universität Wien, Austria). Centro De Giorgi – SNS, Pisa. December 13 – 16, 2022.
"""
@@ -31,11 +31,11 @@ def translate_to_json(conference_html: str) -> str:
},
{"role": "user", "content": "INPUT:"},
{"role": "user", "content": HTML_EXAMPLE },
- {"role": "user", "content": "OUTPUT:"},
+ {"role": "user", "content": "OUTPUT JSON:"},
{"role": "user", "content": OUTPUT_EXAMPLE},
{"role": "user", "content": "INPUT:"},
{"role": "user", "content": conference_html },
- {"role": "user", "content": "OUTPUT:"},
+ {"role": "user", "content": "OUTPUT JSON:"},
],
)
@@ -78,9 +78,6 @@ page_urls = [baseurl] + [baseurl + str(i) for i in range(2, 8)]
conference_html_snippets = [snippet for link in page_urls for snippet in crawl_page(link)]
-# Log the conference snippets to a file
-json.dump(conference_html_snippets, open("paragraphs-html.json", "w"))
-
# Load the model and, set the chat format and use the default model context length
llm = Llama(
model_path="./mistral-7b-instruct-v0.2.Q4_K_M.gguf",
@@ -92,8 +89,6 @@ llm = Llama(
# the result file is a sequence of json objects, one per line
results_file = open(OUTPUT_FILE, "w")
-failed_json = open("failed.json", "w")
-
for conference_html in conference_html_snippets:
print("--------------------------------------------------")
print("Translating:")
@@ -101,21 +96,31 @@ for conference_html in conference_html_snippets:
conference_json = translate_to_json(conference_html)
+ result = {
+ "input_html": conference_html,
+ "raw_output": conference_json,
+ # "json": None,
+ # "success": False,
+ }
+
print("Result:")
print(conference_json)
try:
# parse the result string into a json object to check correctness
conference_object = json.loads(conference_json)
-
- json.dump(conference_object, results_file)
- results_file.write("\n")
- results_file.flush()
+
+ result["success"] = True
+ result["json"] = conference_object
except:
print("> json is invalid, skipping")
- json.dump(conference_json, failed_json)
- failed_json.write("\n")
- failed_json.flush()
+ result["success"] = False
+
+ json.dump(result, results_file)
+ results_file.write("\n")
+ results_file.flush()
+
results_file.close()
+failed_json.close()