Fix example

main
Francesco Minnocci 10 months ago
parent 51100ae0f0
commit 2a29c9e09f
No known key found for this signature in database
GPG Key ID: 76DA3AF9BAED1A32

@ -7,28 +7,11 @@ import json
OUTPUT_FILE = "conferences.json"
LLM_EXAMPLE = (
"INPUT:\n"
'<p><a href="http://www.crm.sns.it/event/507/" target="_blank" rel="noreferrer noopener">Statistical'
" and Computational Aspects of Dynamics<br></a>Organized by Buddhima Kasun Fernando Akurugodage"
" (Centro di ricerca matematica Ennio De Giorgi &#8211; SNS), Paolo Giulietti, and Tanja"
" Isabelle Schindler (Universität Wien, Austria). Centro De Giorgi &#8211; SNS, Pisa. December 13"
" &#8211; 16, 2022.</p>\n"
"\n"
"OUTPUT (JSON): \n"
"{"
'"title": "Statistical and Computational Aspects of Dynamics",'
'"url": "http://www.crm.sns.it/event/507/", '
'"description": "Organized by Buddhima Kasun Fernando Akurugodage (Centro di ricerca matematica'
" Ennio De Giorgi SNS), Paolo Giulietti, and Tanja Isabelle Schindler (Universität Wien,"
' Austria). Centro De Giorgi - SNS, Pisa.", '
'"startDate": "2022-12-13", '
'"endDate": "2022-12-16"'
"}\n"
"\n"
"INPUT:\n"
)
HTML_EXAMPLE = r"""<p><a href="http://www.crm.sns.it/event/507/" target="_blank" rel="noreferrer noopener">Statistical and Computational Aspects of Dynamics<br></a>Organized by Buddhima Kasun Fernando Akurugodage (Centro di ricerca matematica Ennio De Giorgi &#8211; SNS), Paolo Giulietti, and Tanja Isabelle Schindler (Universität Wien, Austria). Centro De Giorgi &#8211; SNS, Pisa. December 13 &#8211; 16, 2022.</p>"""
OUTPUT_EXAMPLE = json.dumps(
{ "title": "Statistical and Computational Aspects of Dynamics", "url": "http://www.crm.sns.it/event/507/", "description": "Organized by Buddhima Kasun Fernando Akurugodage (Centro di ricerca matematica Ennio De Giorgi SNS), Paolo Giulietti, and Tanja Isabelle Schindler (Universität Wien, Austria). Location: Centro De Giorgi - SNS, Pisa.", "startDate": "2022-12-13", "endDate": "2022-12-16" }
)
def translate_to_json(conference_html: str) -> str:
llm_answer = llm.create_chat_completion(
@ -36,11 +19,15 @@ def translate_to_json(conference_html: str) -> str:
messages=[
{
"role": "system",
"content": "You are an assistant aiding a software developer. Be precise in formatting the output correctly as requested",
"content": "You are an assistant. Be precise in formatting the output and only output valid JSON using the specificied fields, without including additional fields or comments.",
},
{"role": "user", "content": LLM_EXAMPLE},
{"role": "user", "content": "INPUT:"},
{"role": "user", "content": HTML_EXAMPLE },
{"role": "user", "content": "OUTPUT:"},
{"role": "user", "content": OUTPUT_EXAMPLE},
{"role": "user", "content": "INPUT:"},
{"role": "user", "content": conference_html },
{"role": "user", "content": "OUTPUT (JSON):"},
{"role": "user", "content": "OUTPUT:"},
],
)
@ -83,9 +70,6 @@ page_urls = [baseurl] + [baseurl + str(i) for i in range(2, 7)]
conference_html_snippets = [snippet for link in page_urls for snippet in crawl_page(link)]
print("LLM Example Context:")
print(LLM_EXAMPLE)
# Load the model and, set the chat format and use the default model context length
llm = Llama(model_path="./mistral-7b-instruct-v0.2.Q4_K_M.gguf", chat_format="llama-2", n_ctx=0)
@ -97,6 +81,7 @@ open(OUTPUT_FILE, "w").close()
results_file = open(OUTPUT_FILE, "a")
for conference_html in conference_html_snippets:
print("--------------------------------------------------")
print("Translating:")
print(conference_html)

Loading…
Cancel
Save