started working on iLMeteo

main
Fran314 4 weeks ago
parent 1c4e05b296
commit 3978d4307f

@ -12,9 +12,33 @@ information
The scraper collects data and arranges in the following scheme The scraper collects data and arranges in the following scheme
| Object field | Data Type | Unit of measurement | Meaning | ```json
| ------------ | --------- | ------------------- | ------- | {
| | | | | "today": {
"[0..23]": {
"temperature": {
"type": "number",
"unitOfMeasurement": "degrees",
"description": "expected temperature at the given hour"
},
"precipitation": {
"type": "number",
"unitOfMeasurement": "mm",
"description": "expected precipitation at the given hour"
}
}
},
"tomorrow": {
"[0..23]": {}
},
"dayAfterTomorrow": {
"[0..23]": {}
},
"week": {
"[0..6]": {}
}
}
```
> **WARNING** > **WARNING**
> The script is configured to run on Nix, and because of this is more > The script is configured to run on Nix, and because of this is more
@ -33,7 +57,7 @@ eventually, together with the current level of implementation
``` ```
| Source | Status | Comments | | Source | Status | Comments |
| ---------------------------------------------------- | ------ | -------- | | ---------------------------------------------------- | ------ | ------------------------------------------------------------------------------------------------------------------------------- |
| [iLMeteo](https://www.ilmeteo.it) | ⛔️ | | | [iLMeteo](https://www.ilmeteo.it) | 🚧 | precipitations are not yet interpolated and are given as the scraped string. Furthermore, there is not yet weather for the week |
| [3BMeteo](https://www.3bmeteo.com/) | ⛔️ | | | [3BMeteo](https://www.3bmeteo.com/) | ⛔️ | |
| [Meteo Aeronautica Militare](http://www.meteoam.it/) | ⛔️ | | | [Meteo Aeronautica Militare](http://www.meteoam.it/) | ⛔️ | |

@ -1,25 +1,21 @@
import 'dotenv/config' import 'dotenv/config'
import puppeteer from 'puppeteer' import puppeteer from 'puppeteer'
import fetchILMeteo from './scrapers/iLMeteo.js'
const NIX_OPS = { const NIX_OPS = {
executablePath: process.env.NIX_CHROMIUM_PATH, executablePath: process.env.NIX_CHROMIUM_PATH,
} }
const opts = process.env.ON_NIX ? NIX_OPS : {} const opts = process.env.ON_NIX ? NIX_OPS : {}
const HOUR = 21 const run = async () => {
const fetchILMeteo = async () => {
const browser = await puppeteer.launch(opts) const browser = await puppeteer.launch(opts)
const page = await browser.newPage()
await page.goto('https://www.ilmeteo.it/meteo/Pisa') const [iLMeteo] = await Promise.all([fetchILMeteo(browser)])
const weatherTable = await page.locator('.weather_table').waitHandle()
const row = await weatherTable.$(`.forecast_1h[data-hour="${HOUR}"]`)
const temp = await row.$('[data-value="temperatura"]')
console.log(await temp?.evaluate(el => el.textContent))
await browser.close() await browser.close()
return { iLMeteo }
} }
fetchILMeteo() console.dir(await run(), { depth: null })

@ -0,0 +1,56 @@
const scrapePage = async (browser, url, isToday) => {
let result = {}
const page = await browser.newPage()
await page.goto(url)
const weatherTable = await page.locator('.weather_table').waitHandle()
const startTime = isToday ? new Date().getHours() + 2 : 0
const endTime = 24
for (let i = startTime; i < endTime; i++) {
const selector = isToday
? `.forecast_1h[data-dialogid="${i}"] td`
: `.forecast_1h[data-dialogid="${1000 + i}"] td`
const fields = await weatherTable.$$(selector)
const temp = await fields[2].evaluate(el => parseFloat(el.textContent))
const prec = await fields[3].evaluate(el => el.textContent)
result[i] = { temp, prec }
}
await page.close()
return result
}
const fetchILMeteoToday = async browser => {
return await scrapePage(browser, 'https://www.ilmeteo.it/meteo/Pisa', true)
}
const fetchILMeteoTomorrow = async browser => {
return await scrapePage(
browser,
'https://www.ilmeteo.it/meteo/Pisa/domani',
false,
)
}
const fetchILMeteoDayAfterTomorrow = async browser => {
return await scrapePage(
browser,
'https://www.ilmeteo.it/meteo/Pisa/dopodomani',
false,
)
}
export default async browser => {
const [today, tomorrow, dayAfterTomorrow] = await Promise.all([
fetchILMeteoToday(browser),
fetchILMeteoTomorrow(browser),
fetchILMeteoDayAfterTomorrow(browser),
])
return {
today,
tomorrow,
dayAfterTomorrow,
}
}
Loading…
Cancel
Save