completed iLMeteo

main
Fran314 4 weeks ago
parent 3978d4307f
commit ea45e8832d

@ -1,14 +1,19 @@
# Weather Web-Scraper
> **CAUTION**
> The following utility is in EARLY developement. Many promises made in this
> readme might not actually be true. Hopefully one day we'll be out of beta ~and
> be releasing on time~
An utility to scrape italian weather websites to collect and compare weather
information
## Usage
> **CAUTION**
> The following utility is in EARLY developement. Many promises made in this
> readme might not actually be true. Hopefully one day we'll be out of beta ~and
> be releasing on time~
> **WARNING**
> The script is configured to run on Nix, and because of this is more
> complicated than it needs to be. You can remove unnecessary stuff if you're
> not running this on nix
The scraper collects data and arranges in the following scheme
@ -16,12 +21,12 @@ The scraper collects data and arranges in the following scheme
{
"today": {
"[0..23]": {
"temperature": {
"temp": {
"type": "number",
"unitOfMeasurement": "degrees",
"description": "expected temperature at the given hour"
},
"precipitation": {
"prec": {
"type": "number",
"unitOfMeasurement": "mm",
"description": "expected precipitation at the given hour"
@ -29,21 +34,59 @@ The scraper collects data and arranges in the following scheme
}
},
"tomorrow": {
"[0..23]": {}
"[0..23]": {
"temp": {
"type": "number",
"unitOfMeasurement": "degrees",
"description": "expected temperature at the given hour"
},
"prec": {
"type": "number",
"unitOfMeasurement": "mm",
"description": "expected precipitation at the given hour"
}
}
},
"dayAfterTomorrow": {
"[0..23]": {}
"[0..23]": {
"temp": {
"type": "number",
"unitOfMeasurement": "degrees",
"description": "expected temperature at the given hour"
},
"prec": {
"type": "number",
"unitOfMeasurement": "mm",
"description": "expected precipitation at the given hour"
}
}
},
"week": {
"[0..6]": {}
"[0..6]": {
"minTemp": {
"type": "number",
"unitOfMeasurement": "degrees",
"description": "minimum expected temperature for the day"
},
"maxTemp": {
"type": "number",
"unitOfMeasurement": "degrees",
"description": "maximum expected temperature for the day"
},
"totPrec": {
"type": "number",
"unitOfMeasurement": "mm",
"description": "total expected precipitation for the day"
}
}
}
}
```
> **WARNING**
> The script is configured to run on Nix, and because of this is more
> complicated than it needs to be. You can remove unnecessary stuff if you're
> not running this on nix
The keys for the `week` entry are intended as an offset from today. That is, the
object at `0` will be the results for today, the object at `1` will be the
results for tomorrow, and the object at `6` will be the results for 6 days from
now
### Sources
@ -56,8 +99,8 @@ eventually, together with the current level of implementation
⛔️ = Not implemented
```
| Source | Status | Comments |
| ---------------------------------------------------- | ------ | ------------------------------------------------------------------------------------------------------------------------------- |
| [iLMeteo](https://www.ilmeteo.it) | 🚧 | precipitations are not yet interpolated and are given as the scraped string. Furthermore, there is not yet weather for the week |
| [3BMeteo](https://www.3bmeteo.com/) | ⛔️ | |
| [Meteo Aeronautica Militare](http://www.meteoam.it/) | ⛔️ | |
| Source | Status | Comments |
| ---------------------------------------------------- | ------ | -------- |
| [iLMeteo](https://www.ilmeteo.it) | ✅ | |
| [3BMeteo](https://www.3bmeteo.com/) | ⛔️ | |
| [Meteo Aeronautica Militare](http://www.meteoam.it/) | ⛔️ | |

@ -19,3 +19,4 @@ const run = async () => {
}
console.dir(await run(), { depth: null })
// await run()

@ -1,3 +1,12 @@
const parseTemp = el => {
return parseFloat(el.textContent)
}
const parsePrec = el => {
const text = el.textContent
if (text === '- assenti -') return 0
return parseFloat(text)
}
const scrapePage = async (browser, url, isToday) => {
let result = {}
@ -6,51 +15,89 @@ const scrapePage = async (browser, url, isToday) => {
const weatherTable = await page.locator('.weather_table').waitHandle()
const startTime = isToday ? new Date().getHours() + 2 : 0
const startTime = isToday ? new Date().getHours() + 1 : 1
const endTime = 24
for (let i = startTime; i < endTime; i++) {
const selector = isToday
? `.forecast_1h[data-dialogid="${i}"] td`
: `.forecast_1h[data-dialogid="${1000 + i}"] td`
const fields = await weatherTable.$$(selector)
if (isToday) {
try {
const row = await weatherTable.$$('.latest_detection')
const fields = await row[1]?.$$('td')
const temp = await fields[2].evaluate(parseTemp)
const prec = await fields[3].evaluate(parsePrec)
result[startTime - 1] = { temp, prec }
} catch (error) {
retult[startTime - 1] = null
// console.log(error)
}
}
for (let i = startTime; i <= endTime; i++) {
try {
const selector = isToday
? `.forecast_1h[data-dialogid="${i}"] td`
: `.forecast_1h[data-dialogid="${1000 + i - 1}"] td`
const fields = await weatherTable.$$(selector)
const temp = await fields[2].evaluate(el => parseFloat(el.textContent))
const prec = await fields[3].evaluate(el => el.textContent)
result[i] = { temp, prec }
const temp = await fields[2].evaluate(parseTemp)
const prec = await fields[3].evaluate(parsePrec)
result[i] = { temp, prec }
} catch (error) {
retult[i] = null
// console.log(error)
}
}
await page.close()
return result
}
const fetchILMeteoToday = async browser => {
return await scrapePage(browser, 'https://www.ilmeteo.it/meteo/Pisa', true)
}
const fetchILMeteoTomorrow = async browser => {
return await scrapePage(
browser,
'https://www.ilmeteo.it/meteo/Pisa/domani',
false,
)
const rearrangeResults = results => {
for (let i = 0; i < 6; i++) {
const midnight = results[i][24]
delete results[i][24]
results[i + 1][0] = midnight
}
delete results[6][24]
return results
}
const fetchILMeteoDayAfterTomorrow = async browser => {
return await scrapePage(
browser,
'https://www.ilmeteo.it/meteo/Pisa/dopodomani',
false,
)
const getDaySummary = day => {
let minTemp = Number.MAX_VALUE
let maxTemp = Number.MIN_VALUE
let totPrec = 0
for (const h in day) {
minTemp = Math.min(minTemp, day[h].temp)
maxTemp = Math.max(maxTemp, day[h].temp)
totPrec += day[h].prec
}
return {
minTemp,
maxTemp,
totPrec,
}
}
export default async browser => {
const [today, tomorrow, dayAfterTomorrow] = await Promise.all([
fetchILMeteoToday(browser),
fetchILMeteoTomorrow(browser),
fetchILMeteoDayAfterTomorrow(browser),
])
const url = text => `https://www.ilmeteo.it/meteo/Pisa/${text}`
const results = rearrangeResults(
await Promise.all([
scrapePage(browser, url(''), true),
scrapePage(browser, url('domani'), false),
scrapePage(browser, url('dopodomani'), false),
scrapePage(browser, url('3'), false),
scrapePage(browser, url('4'), false),
scrapePage(browser, url('5'), false),
scrapePage(browser, url('6'), false),
]),
)
return {
today,
tomorrow,
dayAfterTomorrow,
today: results[0],
tomorrow: results[1],
dayAfterTomorrow: results[2],
week: results.map(getDaySummary),
}
}

Loading…
Cancel
Save