completed iLMeteo

main
Fran314 4 weeks ago
parent 3978d4307f
commit ea45e8832d

@ -1,14 +1,19 @@
# Weather Web-Scraper # Weather Web-Scraper
> **CAUTION**
> The following utility is in EARLY developement. Many promises made in this
> readme might not actually be true. Hopefully one day we'll be out of beta ~and
> be releasing on time~
An utility to scrape italian weather websites to collect and compare weather An utility to scrape italian weather websites to collect and compare weather
information information
## Usage ## Usage
> **CAUTION** > **WARNING**
> The following utility is in EARLY developement. Many promises made in this > The script is configured to run on Nix, and because of this is more
> readme might not actually be true. Hopefully one day we'll be out of beta ~and > complicated than it needs to be. You can remove unnecessary stuff if you're
> be releasing on time~ > not running this on nix
The scraper collects data and arranges in the following scheme The scraper collects data and arranges in the following scheme
@ -16,12 +21,12 @@ The scraper collects data and arranges in the following scheme
{ {
"today": { "today": {
"[0..23]": { "[0..23]": {
"temperature": { "temp": {
"type": "number", "type": "number",
"unitOfMeasurement": "degrees", "unitOfMeasurement": "degrees",
"description": "expected temperature at the given hour" "description": "expected temperature at the given hour"
}, },
"precipitation": { "prec": {
"type": "number", "type": "number",
"unitOfMeasurement": "mm", "unitOfMeasurement": "mm",
"description": "expected precipitation at the given hour" "description": "expected precipitation at the given hour"
@ -29,21 +34,59 @@ The scraper collects data and arranges in the following scheme
} }
}, },
"tomorrow": { "tomorrow": {
"[0..23]": {} "[0..23]": {
"temp": {
"type": "number",
"unitOfMeasurement": "degrees",
"description": "expected temperature at the given hour"
},
"prec": {
"type": "number",
"unitOfMeasurement": "mm",
"description": "expected precipitation at the given hour"
}
}
}, },
"dayAfterTomorrow": { "dayAfterTomorrow": {
"[0..23]": {} "[0..23]": {
"temp": {
"type": "number",
"unitOfMeasurement": "degrees",
"description": "expected temperature at the given hour"
},
"prec": {
"type": "number",
"unitOfMeasurement": "mm",
"description": "expected precipitation at the given hour"
}
}
}, },
"week": { "week": {
"[0..6]": {} "[0..6]": {
"minTemp": {
"type": "number",
"unitOfMeasurement": "degrees",
"description": "minimum expected temperature for the day"
},
"maxTemp": {
"type": "number",
"unitOfMeasurement": "degrees",
"description": "maximum expected temperature for the day"
},
"totPrec": {
"type": "number",
"unitOfMeasurement": "mm",
"description": "total expected precipitation for the day"
}
}
} }
} }
``` ```
> **WARNING** The keys for the `week` entry are intended as an offset from today. That is, the
> The script is configured to run on Nix, and because of this is more object at `0` will be the results for today, the object at `1` will be the
> complicated than it needs to be. You can remove unnecessary stuff if you're results for tomorrow, and the object at `6` will be the results for 6 days from
> not running this on nix now
### Sources ### Sources
@ -57,7 +100,7 @@ eventually, together with the current level of implementation
``` ```
| Source | Status | Comments | | Source | Status | Comments |
| ---------------------------------------------------- | ------ | ------------------------------------------------------------------------------------------------------------------------------- | | ---------------------------------------------------- | ------ | -------- |
| [iLMeteo](https://www.ilmeteo.it) | 🚧 | precipitations are not yet interpolated and are given as the scraped string. Furthermore, there is not yet weather for the week | | [iLMeteo](https://www.ilmeteo.it) | ✅ | |
| [3BMeteo](https://www.3bmeteo.com/) | ⛔️ | | | [3BMeteo](https://www.3bmeteo.com/) | ⛔️ | |
| [Meteo Aeronautica Militare](http://www.meteoam.it/) | ⛔️ | | | [Meteo Aeronautica Militare](http://www.meteoam.it/) | ⛔️ | |

@ -19,3 +19,4 @@ const run = async () => {
} }
console.dir(await run(), { depth: null }) console.dir(await run(), { depth: null })
// await run()

@ -1,3 +1,12 @@
const parseTemp = el => {
return parseFloat(el.textContent)
}
const parsePrec = el => {
const text = el.textContent
if (text === '- assenti -') return 0
return parseFloat(text)
}
const scrapePage = async (browser, url, isToday) => { const scrapePage = async (browser, url, isToday) => {
let result = {} let result = {}
@ -6,51 +15,89 @@ const scrapePage = async (browser, url, isToday) => {
const weatherTable = await page.locator('.weather_table').waitHandle() const weatherTable = await page.locator('.weather_table').waitHandle()
const startTime = isToday ? new Date().getHours() + 2 : 0 const startTime = isToday ? new Date().getHours() + 1 : 1
const endTime = 24 const endTime = 24
for (let i = startTime; i < endTime; i++) {
if (isToday) {
try {
const row = await weatherTable.$$('.latest_detection')
const fields = await row[1]?.$$('td')
const temp = await fields[2].evaluate(parseTemp)
const prec = await fields[3].evaluate(parsePrec)
result[startTime - 1] = { temp, prec }
} catch (error) {
retult[startTime - 1] = null
// console.log(error)
}
}
for (let i = startTime; i <= endTime; i++) {
try {
const selector = isToday const selector = isToday
? `.forecast_1h[data-dialogid="${i}"] td` ? `.forecast_1h[data-dialogid="${i}"] td`
: `.forecast_1h[data-dialogid="${1000 + i}"] td` : `.forecast_1h[data-dialogid="${1000 + i - 1}"] td`
const fields = await weatherTable.$$(selector) const fields = await weatherTable.$$(selector)
const temp = await fields[2].evaluate(el => parseFloat(el.textContent)) const temp = await fields[2].evaluate(parseTemp)
const prec = await fields[3].evaluate(el => el.textContent) const prec = await fields[3].evaluate(parsePrec)
result[i] = { temp, prec } result[i] = { temp, prec }
} catch (error) {
retult[i] = null
// console.log(error)
}
} }
await page.close() await page.close()
return result return result
} }
const fetchILMeteoToday = async browser => {
return await scrapePage(browser, 'https://www.ilmeteo.it/meteo/Pisa', true) const rearrangeResults = results => {
for (let i = 0; i < 6; i++) {
const midnight = results[i][24]
delete results[i][24]
results[i + 1][0] = midnight
} }
const fetchILMeteoTomorrow = async browser => { delete results[6][24]
return await scrapePage(
browser, return results
'https://www.ilmeteo.it/meteo/Pisa/domani',
false,
)
} }
const fetchILMeteoDayAfterTomorrow = async browser => {
return await scrapePage( const getDaySummary = day => {
browser, let minTemp = Number.MAX_VALUE
'https://www.ilmeteo.it/meteo/Pisa/dopodomani', let maxTemp = Number.MIN_VALUE
false, let totPrec = 0
) for (const h in day) {
minTemp = Math.min(minTemp, day[h].temp)
maxTemp = Math.max(maxTemp, day[h].temp)
totPrec += day[h].prec
}
return {
minTemp,
maxTemp,
totPrec,
} }
}
export default async browser => { export default async browser => {
const [today, tomorrow, dayAfterTomorrow] = await Promise.all([ const url = text => `https://www.ilmeteo.it/meteo/Pisa/${text}`
fetchILMeteoToday(browser),
fetchILMeteoTomorrow(browser), const results = rearrangeResults(
fetchILMeteoDayAfterTomorrow(browser), await Promise.all([
]) scrapePage(browser, url(''), true),
scrapePage(browser, url('domani'), false),
scrapePage(browser, url('dopodomani'), false),
scrapePage(browser, url('3'), false),
scrapePage(browser, url('4'), false),
scrapePage(browser, url('5'), false),
scrapePage(browser, url('6'), false),
]),
)
return { return {
today, today: results[0],
tomorrow, tomorrow: results[1],
dayAfterTomorrow, dayAfterTomorrow: results[2],
week: results.map(getDaySummary),
} }
} }

Loading…
Cancel
Save