diff --git a/README.md b/README.md index 551f1d2..5ca8d01 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,33 @@ information The scraper collects data and arranges in the following scheme -| Object field | Data Type | Unit of measurement | Meaning | -| ------------ | --------- | ------------------- | ------- | -| | | | | +```json +{ + "today": { + "[0..23]": { + "temperature": { + "type": "number", + "unitOfMeasurement": "degrees", + "description": "expected temperature at the given hour" + }, + "precipitation": { + "type": "number", + "unitOfMeasurement": "mm", + "description": "expected precipitation at the given hour" + } + } + }, + "tomorrow": { + "[0..23]": {} + }, + "dayAfterTomorrow": { + "[0..23]": {} + }, + "week": { + "[0..6]": {} + } +} +``` > **WARNING** > The script is configured to run on Nix, and because of this is more @@ -32,8 +56,8 @@ eventually, together with the current level of implementation ⛔️ = Not implemented ``` -| Source | Status | Comments | -| ---------------------------------------------------- | ------ | -------- | -| [iLMeteo](https://www.ilmeteo.it) | ⛔️ | | -| [3BMeteo](https://www.3bmeteo.com/) | ⛔️ | | -| [Meteo Aeronautica Militare](http://www.meteoam.it/) | ⛔️ | | +| Source | Status | Comments | +| ---------------------------------------------------- | ------ | ------------------------------------------------------------------------------------------------------------------------------- | +| [iLMeteo](https://www.ilmeteo.it) | 🚧 | precipitations are not yet interpolated and are given as the scraped string. Furthermore, there is not yet weather for the week | +| [3BMeteo](https://www.3bmeteo.com/) | ⛔️ | | +| [Meteo Aeronautica Militare](http://www.meteoam.it/) | ⛔️ | | diff --git a/index.js b/index.js index abdb283..be019df 100644 --- a/index.js +++ b/index.js @@ -1,25 +1,21 @@ import 'dotenv/config' import puppeteer from 'puppeteer' +import fetchILMeteo from './scrapers/iLMeteo.js' + const NIX_OPS = { executablePath: process.env.NIX_CHROMIUM_PATH, } const opts = process.env.ON_NIX ? NIX_OPS : {} -const HOUR = 21 - -const fetchILMeteo = async () => { +const run = async () => { const browser = await puppeteer.launch(opts) - const page = await browser.newPage() - await page.goto('https://www.ilmeteo.it/meteo/Pisa') - - const weatherTable = await page.locator('.weather_table').waitHandle() - const row = await weatherTable.$(`.forecast_1h[data-hour="${HOUR}"]`) - const temp = await row.$('[data-value="temperatura"]') - console.log(await temp?.evaluate(el => el.textContent)) + const [iLMeteo] = await Promise.all([fetchILMeteo(browser)]) await browser.close() + + return { iLMeteo } } -fetchILMeteo() +console.dir(await run(), { depth: null }) diff --git a/scrapers/iLMeteo.js b/scrapers/iLMeteo.js new file mode 100644 index 0000000..2cf89d8 --- /dev/null +++ b/scrapers/iLMeteo.js @@ -0,0 +1,56 @@ +const scrapePage = async (browser, url, isToday) => { + let result = {} + + const page = await browser.newPage() + await page.goto(url) + + const weatherTable = await page.locator('.weather_table').waitHandle() + + const startTime = isToday ? new Date().getHours() + 2 : 0 + const endTime = 24 + for (let i = startTime; i < endTime; i++) { + const selector = isToday + ? `.forecast_1h[data-dialogid="${i}"] td` + : `.forecast_1h[data-dialogid="${1000 + i}"] td` + + const fields = await weatherTable.$$(selector) + + const temp = await fields[2].evaluate(el => parseFloat(el.textContent)) + const prec = await fields[3].evaluate(el => el.textContent) + result[i] = { temp, prec } + } + + await page.close() + + return result +} +const fetchILMeteoToday = async browser => { + return await scrapePage(browser, 'https://www.ilmeteo.it/meteo/Pisa', true) +} +const fetchILMeteoTomorrow = async browser => { + return await scrapePage( + browser, + 'https://www.ilmeteo.it/meteo/Pisa/domani', + false, + ) +} +const fetchILMeteoDayAfterTomorrow = async browser => { + return await scrapePage( + browser, + 'https://www.ilmeteo.it/meteo/Pisa/dopodomani', + false, + ) +} +export default async browser => { + const [today, tomorrow, dayAfterTomorrow] = await Promise.all([ + fetchILMeteoToday(browser), + fetchILMeteoTomorrow(browser), + fetchILMeteoDayAfterTomorrow(browser), + ]) + + return { + today, + tomorrow, + dayAfterTomorrow, + } +}