diff --git a/main.py b/main.py
index df7723d..2ae3f43 100755
--- a/main.py
+++ b/main.py
@@ -9,69 +9,67 @@ import json
OUTPUT_FILE = "results.json"
-HTML_EXAMPLE_1 = r"""
Statistical and Computational Aspects of Dynamics
Organized by Buddhima Kasun Fernando Akurugodage (Centro di ricerca matematica Ennio De Giorgi – SNS), Paolo Giulietti, and Tanja Isabelle Schindler (Universität Wien, Austria). Centro De Giorgi – SNS, Pisa. December 13 – 16, 2022.
"""
-
-
-OUTPUT_EXAMPLE_1 = json.dumps({
- "title": "Statistical and Computational Aspects of Dynamics",
- "url": "http://www.crm.sns.it/event/507/",
- "description": "Organized by Buddhima Kasun Fernando Akurugodage (Centro di ricerca matematica Ennio De Giorgi – SNS), Paolo Giulietti, and Tanja Isabelle Schindler (Universität Wien, Austria).\n\nLocation: Centro De Giorgi – SNS, Pisa",
- "startDate": "2022-12-13",
- "endDate": "2022-12-16"
-})
-
-
-HTML_EXAMPLE_2 = r"""Workshop on Variational problems, PDEs and applications
Organized by Luigi Berselli, Giuseppe Buttazzo, Matteo Novaga, and Andrea Malchiodi (Scuola Normale Superiore, Pisa). Department of Mathematics, Pisa. January 17 – 18, 2020.
"""
-
-
-OUTPUT_EXAMPLE_2 = json.dumps({
- "title": "Workshop on Variational problems, PDEs and applications",
- "url": "http://pagine.dm.unipi.it/berselli/meeting2020/",
- "description": "Organized by Luigi Berselli, Giuseppe Buttazzo, Matteo Novaga, and Andrea Malchiodi (Scuola Normale Superiore, Pisa).\n\nLocation: Department of Mathematics, Pisa",
- "startDate": "2020-01-17",
- "endDate": "2020-01-18",
-})
-
-
-HTML_EXAMPLE_3 = r"""Geometric Representation Theory. ICM Satellite Conference
Organized by Tomoyuki Arakawa (RIMS, Kyoto, Japan), Joel Kamnitzer (University of Toronto, Japan), Hiraku Nakajima (Kavli IPMU, Japan), Markus Reineke (Ruhr-Universität Bochum), Francesco Sala, and Vera Serganova (University of California Berkeley, USA). Online. June 27 – July 2, 2022.
"""
-
-
-OUTPUT_EXAMPLE_3 = json.dumps({
- "title": "Geometric Representation Theory. ICM Satellite Conference",
- "url": None,
- "description": "Organized by Tomoyuki Arakawa (RIMS, Kyoto, Japan), Joel Kamnitzer (University of Toronto, Japan), Hiraku Nakajima (Kavli IPMU, Japan), Markus Reineke (Ruhr-Universität Bochum), Francesco Sala, and Vera Serganova (University of California Berkeley, USA).\n\nLocation: Online",
- "startDate": "2022-06-27",
- "endDate": "2022-07-02",
-})
-
+EXAMPLES = [
+ {
+ "input": r"""Statistical and Computational Aspects of Dynamics
Organized by Buddhima Kasun Fernando Akurugodage (Centro di ricerca matematica Ennio De Giorgi – SNS), Paolo Giulietti, and Tanja Isabelle Schindler (Universität Wien, Austria). Centro De Giorgi – SNS, Pisa. December 13 – 16, 2022.
""",
+ "output": json.dumps({
+ "title": "Statistical and Computational Aspects of Dynamics",
+ "url": "http://www.crm.sns.it/event/507/",
+ "description": "Organized by Buddhima Kasun Fernando Akurugodage (Centro di ricerca matematica Ennio De Giorgi – SNS), Paolo Giulietti, and Tanja Isabelle Schindler (Universität Wien, Austria).\n\nLocation: Centro De Giorgi – SNS, Pisa",
+ "startDate": "2022-12-13",
+ "endDate": "2022-12-16"
+ }),
+ },
+ {
+ "input": r"""Workshop on Variational problems, PDEs and applications
Organized by Luigi Berselli, Giuseppe Buttazzo, Matteo Novaga, and Andrea Malchiodi (Scuola Normale Superiore, Pisa). Department of Mathematics, Pisa. January 17 – 18, 2020.
""",
+ "output": json.dumps({
+ "title": "Workshop on Variational problems, PDEs and applications",
+ "url": "http://pagine.dm.unipi.it/berselli/meeting2020/",
+ "description": "Organized by Luigi Berselli, Giuseppe Buttazzo, Matteo Novaga, and Andrea Malchiodi (Scuola Normale Superiore, Pisa).\n\nLocation: Department of Mathematics, Pisa",
+ "startDate": "2020-01-17",
+ "endDate": "2020-01-18",
+ }),
+ },
+ {
+ "input": r"""Geometric Representation Theory. ICM Satellite Conference
Organized by Tomoyuki Arakawa (RIMS, Kyoto, Japan), Joel Kamnitzer (University of Toronto, Japan), Hiraku Nakajima (Kavli IPMU, Japan), Markus Reineke (Ruhr-Universität Bochum), Francesco Sala, and Vera Serganova (University of California Berkeley, USA). Online. June 27 – July 2, 2022.
""",
+ "output": json.dumps({
+ "title": "Geometric Representation Theory. ICM Satellite Conference",
+ "url": None,
+ "description": "Organized by Tomoyuki Arakawa (RIMS, Kyoto, Japan), Joel Kamnitzer (University of Toronto, Japan), Hiraku Nakajima (Kavli IPMU, Japan), Markus Reineke (Ruhr-Universität Bochum), Francesco Sala, and Vera Serganova (University of California Berkeley, USA).\n\nLocation: Online",
+ "startDate": "2022-06-27",
+ "endDate": "2022-07-02",
+ }),
+ },
+ {
+ "input": r"""Incontri di geometria algebrica ed aritmetica Milano – Pisa
Department of Mathematics, Pisa. November 16 – 17, 2022.
""",
+ "output": json.dumps({
+ "title": "Incontri di geometria algebrica ed aritmetica Milano – Pisa",
+ "url": "https://events.dm.unipi.it/event/109/",
+ "description": "Location: Department of Mathematics, Pisa",
+ "startDate": "2022-11-16",
+ "endDate": "2022-11-17"
+ })
+ }
+]
-def translate_to_json(conference_html: str) -> str:
+def translate_to_json(input_html: str) -> str:
llm_answer = llm.create_chat_completion(
max_tokens=None,
messages=[
{
"role": "system",
- "content": "You are an assistant helping a developer converting raw text data to JSON. Be precise in formatting the output and only output valid JSON using the specificied fields, without including any additional fields or comments",
+ "content": "You are an assistant helping a developer converting raw text data to JSON. Output only valid JSON following the given examples, without including any additional notes or comments",
+ },
+ ] + (
+ map(lambda example: ({
+ "role": "user",
+ "content": f"INPUT:\n{example["input"]}\n\nOUTPUT JSON:\n{example["output"]}"
+ }), EXAMPLES)
+ ) + [
+ {
+ "role": "user",
+ "content": f"INPUT:\n{input_html}\n\nOUTPUT JSON:\n"
},
- # Example 1
- { "role": "user", "content": "INPUT:" },
- { "role": "user", "content": HTML_EXAMPLE_1 },
- { "role": "user", "content": "OUTPUT JSON:" },
- { "role": "user", "content": OUTPUT_EXAMPLE_1 },
- # Example 2
- { "role": "user", "content": "INPUT:" },
- { "role": "user", "content": HTML_EXAMPLE_2 },
- { "role": "user", "content": "OUTPUT JSON:" },
- { "role": "user", "content": OUTPUT_EXAMPLE_2 },
- # Example 3
- { "role": "user", "content": "INPUT:" },
- { "role": "user", "content": HTML_EXAMPLE_3 },
- { "role": "user", "content": "OUTPUT JSON:" },
- { "role": "user", "content": OUTPUT_EXAMPLE_3 },
- # Actual item to process
- { "role": "user", "content": "INPUT:" },
- { "role": "user", "content": conference_html },
- { "role": "user", "content": "OUTPUT JSON:" },
],
)
@@ -119,6 +117,7 @@ llm = Llama(
model_path="./mistral-7b-instruct-v0.2.Q4_K_M.gguf",
chat_format="llama-2",
verbose=False,
+ echo=True,
n_ctx=0,
)