From ea45e8832d22daf5bbc9814fbb17d61c8f1d23e1 Mon Sep 17 00:00:00 2001 From: Fran314 Date: Sun, 27 Oct 2024 10:36:52 +0100 Subject: [PATCH] completed iLMeteo --- README.md | 79 ++++++++++++++++++++++++------- index.js | 1 + scrapers/iLMeteo.js | 111 +++++++++++++++++++++++++++++++------------- 3 files changed, 141 insertions(+), 50 deletions(-) diff --git a/README.md b/README.md index 5ca8d01..b34efca 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,19 @@ # Weather Web-Scraper +> **CAUTION** +> The following utility is in EARLY developement. Many promises made in this +> readme might not actually be true. Hopefully one day we'll be out of beta ~and +> be releasing on time~ + An utility to scrape italian weather websites to collect and compare weather information ## Usage -> **CAUTION** -> The following utility is in EARLY developement. Many promises made in this -> readme might not actually be true. Hopefully one day we'll be out of beta ~and -> be releasing on time~ +> **WARNING** +> The script is configured to run on Nix, and because of this is more +> complicated than it needs to be. You can remove unnecessary stuff if you're +> not running this on nix The scraper collects data and arranges in the following scheme @@ -16,12 +21,12 @@ The scraper collects data and arranges in the following scheme { "today": { "[0..23]": { - "temperature": { + "temp": { "type": "number", "unitOfMeasurement": "degrees", "description": "expected temperature at the given hour" }, - "precipitation": { + "prec": { "type": "number", "unitOfMeasurement": "mm", "description": "expected precipitation at the given hour" @@ -29,21 +34,59 @@ The scraper collects data and arranges in the following scheme } }, "tomorrow": { - "[0..23]": {} + "[0..23]": { + "temp": { + "type": "number", + "unitOfMeasurement": "degrees", + "description": "expected temperature at the given hour" + }, + "prec": { + "type": "number", + "unitOfMeasurement": "mm", + "description": "expected precipitation at the given hour" + } + } }, "dayAfterTomorrow": { - "[0..23]": {} + "[0..23]": { + "temp": { + "type": "number", + "unitOfMeasurement": "degrees", + "description": "expected temperature at the given hour" + }, + "prec": { + "type": "number", + "unitOfMeasurement": "mm", + "description": "expected precipitation at the given hour" + } + } }, "week": { - "[0..6]": {} + "[0..6]": { + "minTemp": { + "type": "number", + "unitOfMeasurement": "degrees", + "description": "minimum expected temperature for the day" + }, + "maxTemp": { + "type": "number", + "unitOfMeasurement": "degrees", + "description": "maximum expected temperature for the day" + }, + "totPrec": { + "type": "number", + "unitOfMeasurement": "mm", + "description": "total expected precipitation for the day" + } + } } } ``` -> **WARNING** -> The script is configured to run on Nix, and because of this is more -> complicated than it needs to be. You can remove unnecessary stuff if you're -> not running this on nix +The keys for the `week` entry are intended as an offset from today. That is, the +object at `0` will be the results for today, the object at `1` will be the +results for tomorrow, and the object at `6` will be the results for 6 days from +now ### Sources @@ -56,8 +99,8 @@ eventually, together with the current level of implementation ⛔️ = Not implemented ``` -| Source | Status | Comments | -| ---------------------------------------------------- | ------ | ------------------------------------------------------------------------------------------------------------------------------- | -| [iLMeteo](https://www.ilmeteo.it) | 🚧 | precipitations are not yet interpolated and are given as the scraped string. Furthermore, there is not yet weather for the week | -| [3BMeteo](https://www.3bmeteo.com/) | ⛔️ | | -| [Meteo Aeronautica Militare](http://www.meteoam.it/) | ⛔️ | | +| Source | Status | Comments | +| ---------------------------------------------------- | ------ | -------- | +| [iLMeteo](https://www.ilmeteo.it) | ✅ | | +| [3BMeteo](https://www.3bmeteo.com/) | ⛔️ | | +| [Meteo Aeronautica Militare](http://www.meteoam.it/) | ⛔️ | | diff --git a/index.js b/index.js index be019df..a8dea31 100644 --- a/index.js +++ b/index.js @@ -19,3 +19,4 @@ const run = async () => { } console.dir(await run(), { depth: null }) +// await run() diff --git a/scrapers/iLMeteo.js b/scrapers/iLMeteo.js index 2cf89d8..b1a7ab4 100644 --- a/scrapers/iLMeteo.js +++ b/scrapers/iLMeteo.js @@ -1,3 +1,12 @@ +const parseTemp = el => { + return parseFloat(el.textContent) +} +const parsePrec = el => { + const text = el.textContent + if (text === '- assenti -') return 0 + return parseFloat(text) +} + const scrapePage = async (browser, url, isToday) => { let result = {} @@ -6,51 +15,89 @@ const scrapePage = async (browser, url, isToday) => { const weatherTable = await page.locator('.weather_table').waitHandle() - const startTime = isToday ? new Date().getHours() + 2 : 0 + const startTime = isToday ? new Date().getHours() + 1 : 1 const endTime = 24 - for (let i = startTime; i < endTime; i++) { - const selector = isToday - ? `.forecast_1h[data-dialogid="${i}"] td` - : `.forecast_1h[data-dialogid="${1000 + i}"] td` - const fields = await weatherTable.$$(selector) + if (isToday) { + try { + const row = await weatherTable.$$('.latest_detection') + const fields = await row[1]?.$$('td') + const temp = await fields[2].evaluate(parseTemp) + const prec = await fields[3].evaluate(parsePrec) + result[startTime - 1] = { temp, prec } + } catch (error) { + retult[startTime - 1] = null + // console.log(error) + } + } + for (let i = startTime; i <= endTime; i++) { + try { + const selector = isToday + ? `.forecast_1h[data-dialogid="${i}"] td` + : `.forecast_1h[data-dialogid="${1000 + i - 1}"] td` + + const fields = await weatherTable.$$(selector) - const temp = await fields[2].evaluate(el => parseFloat(el.textContent)) - const prec = await fields[3].evaluate(el => el.textContent) - result[i] = { temp, prec } + const temp = await fields[2].evaluate(parseTemp) + const prec = await fields[3].evaluate(parsePrec) + result[i] = { temp, prec } + } catch (error) { + retult[i] = null + // console.log(error) + } } await page.close() return result } -const fetchILMeteoToday = async browser => { - return await scrapePage(browser, 'https://www.ilmeteo.it/meteo/Pisa', true) -} -const fetchILMeteoTomorrow = async browser => { - return await scrapePage( - browser, - 'https://www.ilmeteo.it/meteo/Pisa/domani', - false, - ) + +const rearrangeResults = results => { + for (let i = 0; i < 6; i++) { + const midnight = results[i][24] + delete results[i][24] + results[i + 1][0] = midnight + } + delete results[6][24] + + return results } -const fetchILMeteoDayAfterTomorrow = async browser => { - return await scrapePage( - browser, - 'https://www.ilmeteo.it/meteo/Pisa/dopodomani', - false, - ) + +const getDaySummary = day => { + let minTemp = Number.MAX_VALUE + let maxTemp = Number.MIN_VALUE + let totPrec = 0 + for (const h in day) { + minTemp = Math.min(minTemp, day[h].temp) + maxTemp = Math.max(maxTemp, day[h].temp) + totPrec += day[h].prec + } + return { + minTemp, + maxTemp, + totPrec, + } } + export default async browser => { - const [today, tomorrow, dayAfterTomorrow] = await Promise.all([ - fetchILMeteoToday(browser), - fetchILMeteoTomorrow(browser), - fetchILMeteoDayAfterTomorrow(browser), - ]) + const url = text => `https://www.ilmeteo.it/meteo/Pisa/${text}` + + const results = rearrangeResults( + await Promise.all([ + scrapePage(browser, url(''), true), + scrapePage(browser, url('domani'), false), + scrapePage(browser, url('dopodomani'), false), + scrapePage(browser, url('3'), false), + scrapePage(browser, url('4'), false), + scrapePage(browser, url('5'), false), + scrapePage(browser, url('6'), false), + ]), + ) return { - today, - tomorrow, - dayAfterTomorrow, + today: results[0], + tomorrow: results[1], + dayAfterTomorrow: results[2], + week: results.map(getDaySummary), } }