|
|
@ -78,15 +78,14 @@ page_urls = [baseurl] + [baseurl + str(i) for i in range(2, 7)]
|
|
|
|
|
|
|
|
|
|
|
|
conference_html_snippets = [snippet for link in page_urls for snippet in crawl_page(link)]
|
|
|
|
conference_html_snippets = [snippet for link in page_urls for snippet in crawl_page(link)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Log the conference snippets to a file
|
|
|
|
|
|
|
|
json.dump(conference_html_snippets, open("paragraphs-html.json", "w"))
|
|
|
|
|
|
|
|
|
|
|
|
# Load the model and, set the chat format and use the default model context length
|
|
|
|
# Load the model and, set the chat format and use the default model context length
|
|
|
|
llm = Llama(model_path="./mistral-7b-instruct-v0.2.Q4_K_M.gguf", chat_format="llama-2", n_ctx=0)
|
|
|
|
llm = Llama(model_path="./mistral-7b-instruct-v0.2.Q4_K_M.gguf", chat_format="llama-2", n_ctx=0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# clear the result file
|
|
|
|
|
|
|
|
open(OUTPUT_FILE, "w").close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# the result file is a sequence of json objects, one per line
|
|
|
|
# the result file is a sequence of json objects, one per line
|
|
|
|
results_file = open(OUTPUT_FILE, "a")
|
|
|
|
results_file = open(OUTPUT_FILE, "w")
|
|
|
|
|
|
|
|
|
|
|
|
for conference_html in conference_html_snippets:
|
|
|
|
for conference_html in conference_html_snippets:
|
|
|
|
print("--------------------------------------------------")
|
|
|
|
print("--------------------------------------------------")
|
|
|
|