diff --git a/main.py b/main.py index df7723d..2ae3f43 100755 --- a/main.py +++ b/main.py @@ -9,69 +9,67 @@ import json OUTPUT_FILE = "results.json" -HTML_EXAMPLE_1 = r"""

Statistical and Computational Aspects of Dynamics
Organized by Buddhima Kasun Fernando Akurugodage (Centro di ricerca matematica Ennio De Giorgi – SNS), Paolo Giulietti, and Tanja Isabelle Schindler (Universität Wien, Austria). Centro De Giorgi – SNS, Pisa. December 13 – 16, 2022.

""" - - -OUTPUT_EXAMPLE_1 = json.dumps({ - "title": "Statistical and Computational Aspects of Dynamics", - "url": "http://www.crm.sns.it/event/507/", - "description": "Organized by Buddhima Kasun Fernando Akurugodage (Centro di ricerca matematica Ennio De Giorgi – SNS), Paolo Giulietti, and Tanja Isabelle Schindler (Universität Wien, Austria).\n\nLocation: Centro De Giorgi – SNS, Pisa", - "startDate": "2022-12-13", - "endDate": "2022-12-16" -}) - - -HTML_EXAMPLE_2 = r"""

Workshop on Variational problems, PDEs and applications
Organized by Luigi Berselli, Giuseppe Buttazzo, Matteo Novaga, and Andrea Malchiodi (Scuola Normale Superiore, Pisa). Department of Mathematics, Pisa. January 17 – 18, 2020.

""" - - -OUTPUT_EXAMPLE_2 = json.dumps({ - "title": "Workshop on Variational problems, PDEs and applications", - "url": "http://pagine.dm.unipi.it/berselli/meeting2020/", - "description": "Organized by Luigi Berselli, Giuseppe Buttazzo, Matteo Novaga, and Andrea Malchiodi (Scuola Normale Superiore, Pisa).\n\nLocation: Department of Mathematics, Pisa", - "startDate": "2020-01-17", - "endDate": "2020-01-18", -}) - - -HTML_EXAMPLE_3 = r"""

Geometric Representation Theory. ICM Satellite Conference
Organized by Tomoyuki Arakawa (RIMS, Kyoto, Japan), Joel Kamnitzer (University of Toronto, Japan), Hiraku Nakajima (Kavli IPMU, Japan), Markus Reineke (Ruhr-Universität Bochum), Francesco Sala, and Vera Serganova (University of California Berkeley, USA). Online. June 27 – July 2, 2022.

""" - - -OUTPUT_EXAMPLE_3 = json.dumps({ - "title": "Geometric Representation Theory. ICM Satellite Conference", - "url": None, - "description": "Organized by Tomoyuki Arakawa (RIMS, Kyoto, Japan), Joel Kamnitzer (University of Toronto, Japan), Hiraku Nakajima (Kavli IPMU, Japan), Markus Reineke (Ruhr-Universität Bochum), Francesco Sala, and Vera Serganova (University of California Berkeley, USA).\n\nLocation: Online", - "startDate": "2022-06-27", - "endDate": "2022-07-02", -}) - +EXAMPLES = [ + { + "input": r"""

Statistical and Computational Aspects of Dynamics
Organized by Buddhima Kasun Fernando Akurugodage (Centro di ricerca matematica Ennio De Giorgi – SNS), Paolo Giulietti, and Tanja Isabelle Schindler (Universität Wien, Austria). Centro De Giorgi – SNS, Pisa. December 13 – 16, 2022.

""", + "output": json.dumps({ + "title": "Statistical and Computational Aspects of Dynamics", + "url": "http://www.crm.sns.it/event/507/", + "description": "Organized by Buddhima Kasun Fernando Akurugodage (Centro di ricerca matematica Ennio De Giorgi – SNS), Paolo Giulietti, and Tanja Isabelle Schindler (Universität Wien, Austria).\n\nLocation: Centro De Giorgi – SNS, Pisa", + "startDate": "2022-12-13", + "endDate": "2022-12-16" + }), + }, + { + "input": r"""

Workshop on Variational problems, PDEs and applications
Organized by Luigi Berselli, Giuseppe Buttazzo, Matteo Novaga, and Andrea Malchiodi (Scuola Normale Superiore, Pisa). Department of Mathematics, Pisa. January 17 – 18, 2020.

""", + "output": json.dumps({ + "title": "Workshop on Variational problems, PDEs and applications", + "url": "http://pagine.dm.unipi.it/berselli/meeting2020/", + "description": "Organized by Luigi Berselli, Giuseppe Buttazzo, Matteo Novaga, and Andrea Malchiodi (Scuola Normale Superiore, Pisa).\n\nLocation: Department of Mathematics, Pisa", + "startDate": "2020-01-17", + "endDate": "2020-01-18", + }), + }, + { + "input": r"""

Geometric Representation Theory. ICM Satellite Conference
Organized by Tomoyuki Arakawa (RIMS, Kyoto, Japan), Joel Kamnitzer (University of Toronto, Japan), Hiraku Nakajima (Kavli IPMU, Japan), Markus Reineke (Ruhr-Universität Bochum), Francesco Sala, and Vera Serganova (University of California Berkeley, USA). Online. June 27 – July 2, 2022.

""", + "output": json.dumps({ + "title": "Geometric Representation Theory. ICM Satellite Conference", + "url": None, + "description": "Organized by Tomoyuki Arakawa (RIMS, Kyoto, Japan), Joel Kamnitzer (University of Toronto, Japan), Hiraku Nakajima (Kavli IPMU, Japan), Markus Reineke (Ruhr-Universität Bochum), Francesco Sala, and Vera Serganova (University of California Berkeley, USA).\n\nLocation: Online", + "startDate": "2022-06-27", + "endDate": "2022-07-02", + }), + }, + { + "input": r"""

Incontri di geometria algebrica ed aritmetica Milano – Pisa
Department of Mathematics, Pisa. November 16 – 17, 2022.

""", + "output": json.dumps({ + "title": "Incontri di geometria algebrica ed aritmetica Milano – Pisa", + "url": "https://events.dm.unipi.it/event/109/", + "description": "Location: Department of Mathematics, Pisa", + "startDate": "2022-11-16", + "endDate": "2022-11-17" + }) + } +] -def translate_to_json(conference_html: str) -> str: +def translate_to_json(input_html: str) -> str: llm_answer = llm.create_chat_completion( max_tokens=None, messages=[ { "role": "system", - "content": "You are an assistant helping a developer converting raw text data to JSON. Be precise in formatting the output and only output valid JSON using the specificied fields, without including any additional fields or comments", + "content": "You are an assistant helping a developer converting raw text data to JSON. Output only valid JSON following the given examples, without including any additional notes or comments", + }, + ] + ( + map(lambda example: ({ + "role": "user", + "content": f"INPUT:\n{example["input"]}\n\nOUTPUT JSON:\n{example["output"]}" + }), EXAMPLES) + ) + [ + { + "role": "user", + "content": f"INPUT:\n{input_html}\n\nOUTPUT JSON:\n" }, - # Example 1 - { "role": "user", "content": "INPUT:" }, - { "role": "user", "content": HTML_EXAMPLE_1 }, - { "role": "user", "content": "OUTPUT JSON:" }, - { "role": "user", "content": OUTPUT_EXAMPLE_1 }, - # Example 2 - { "role": "user", "content": "INPUT:" }, - { "role": "user", "content": HTML_EXAMPLE_2 }, - { "role": "user", "content": "OUTPUT JSON:" }, - { "role": "user", "content": OUTPUT_EXAMPLE_2 }, - # Example 3 - { "role": "user", "content": "INPUT:" }, - { "role": "user", "content": HTML_EXAMPLE_3 }, - { "role": "user", "content": "OUTPUT JSON:" }, - { "role": "user", "content": OUTPUT_EXAMPLE_3 }, - # Actual item to process - { "role": "user", "content": "INPUT:" }, - { "role": "user", "content": conference_html }, - { "role": "user", "content": "OUTPUT JSON:" }, ], ) @@ -119,6 +117,7 @@ llm = Llama( model_path="./mistral-7b-instruct-v0.2.Q4_K_M.gguf", chat_format="llama-2", verbose=False, + echo=True, n_ctx=0, )