chore: minor enhancements

main
Antonio De Lucreziis 10 months ago
parent 8c6364277e
commit 75557d0834

3
.gitignore vendored

@ -1,6 +1,9 @@
# Local files
*.local*
# Output files
*.json
# Python
venv/

@ -27,7 +27,7 @@ $ pip install -r requirements.txt
## Launch
The following command will crawl the conferences from `https://www.dm.unipi.it/research/past-conferences/` (pages 1 to 5) and save the results in `conferences.json`:
The following command will crawl all the conferences from `https://www.dm.unipi.it/research/past-conferences/` (all pages) and save the results in `conferences.json` as a list of json objects, one per line.
```bash
$ python main.py

@ -6,6 +6,8 @@ from bs4 import BeautifulSoup
import textwrap
import json
OUTPUT_FILE = "conferences.json"
LLM_EXAMPLE = (
"INPUT:\n"
'<p><a href="http://www.crm.sns.it/event/507/" target="_blank" rel="noreferrer noopener">Statistical'
@ -99,10 +101,10 @@ llm = Llama(model_path="./mistral-7b-instruct-v0.2.Q4_K_M.gguf", chat_format="ll
# clear the result file
open("conferences.json", "w").close()
open(OUTPUT_FILE, "w").close()
# the result file is a sequence of json objects, one per line
results_file = open("conferences.json", "a")
results_file = open(OUTPUT_FILE, "a")
for conference_html in conference_html_snippets:
print("Translating:")

Loading…
Cancel
Save