From 27b8e7cdf1a9c36a76cece5325a625d228b6e51d Mon Sep 17 00:00:00 2001 From: alberto Date: Sun, 9 Mar 2025 09:22:18 +0100 Subject: [PATCH] Add parseunimap script. --- README.md | 22 +++++++++++++++++++ parseunimap.js | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 parseunimap.js diff --git a/README.md b/README.md index 74590ee..5b696c1 100644 --- a/README.md +++ b/README.md @@ -105,3 +105,25 @@ Questa repository contiene i seguenti script: ``` Qui un esempio di utilizzo: [Argomenti di G2](https://git.phc.dm.unipi.it/lukefleed/domande-orali/src/branch/master/geoemtria-2) + +- `parseunimap` — [@aziis98](https://git.phc.dm.unipi.it/aziis98) + [@alberto](https://git.phc.dm.unipi.it/alberto) + + Script che converte il registro delle lezioni dell'Unipi in markdown + + Installare: + ```bash + bun init + bun install jsdom + bun install node-fetch + ``` + + Per eseguire: + ```bash + bun run script.js + ``` + Una volta ottenuto in output il file markdown, รจ possibile convertirlo in pdf utilizzando ad + esempio Pandoc tramite il comando + + ```bash + pandoc fileregistro.md -o fileregistro.pdf + ``` diff --git a/parseunimap.js b/parseunimap.js new file mode 100644 index 0000000..f5a5648 --- /dev/null +++ b/parseunimap.js @@ -0,0 +1,58 @@ +import {createWriteStream} from 'fs'; +import {JSDOM} from "jsdom"; +import fetch from "node-fetch"; + +(async () => { + try { + const url = process.argv[2]; + if (!url) { + console.error("Usage: node script.js "); + process.exit(1); + } + + const response = + await fetch(url, {headers : {"User-Agent" : "Mozilla/5.0"}}); + const buffer = await response.arrayBuffer(); + const isoHtml = Buffer.from(buffer, "binary").toString("latin1"); + + const dom = new JSDOM(isoHtml); + const document = dom.window.document; + + let ol = document.querySelector("ol"); + if (!ol) { + console.error("No
    found on the page"); + return; + } + + ol.querySelectorAll("a, i").forEach(el => el.remove()); + + const results = [...ol.children ] + .map(li => li.textContent.match(/\w+:(.+)/sm)) + .filter(match => match) + .map(match => match[1].trim()) + .map((x, index) => (index + 1) + ". " + x + " \n"); + + const writeStream = createWriteStream("registro.md") + const pathName = writeStream.path; + writeStream.on('error', function(err) { /* error handling */ }); + const head = "# Registro delle lezioni \n\n"; + writeStream.write(head) + results.forEach(value => writeStream.write(`${value}`)); + // the finish event is emitted when all data has been flushed from the + // stream + writeStream.on( + 'finish', + () => { console.log(`Registro salvato nel file ${pathName}`); }); + + // handle the errors on the write process + writeStream.on( + 'error', + (err) => {console.error( + `There is an error writing the file ${pathName} => ${err}`)}); + + // close the stream + writeStream.end(); + } catch (error) { + console.error("Error fetching or processing page:", error); + } +})();