every source has its own format

main
Fran314 4 weeks ago
parent 1f8fcb3273
commit 3b694826c4

@ -6,7 +6,7 @@
> be releasing on time~ > be releasing on time~
An utility to scrape italian weather websites to collect and compare weather An utility to scrape italian weather websites to collect and compare weather
information information for Pisa
## Usage ## Usage
@ -15,109 +15,200 @@ information
> complicated than it needs to be. You can remove unnecessary stuff if you're > complicated than it needs to be. You can remove unnecessary stuff if you're
> not running this on nix > not running this on nix
The scraper collects data and arranges in the following scheme The script scrapes the weather forecast from the implemented sources (see the
table below) and returns an object with the following fields
- **today:** an object with elements from the hour after the current one to
23, of type hourly (see below)
- **tomorrow:** an object with elements from hours 0 to 23, of type hourly
(see below)
- **dayAfterTomorrow:** an object with elements from hours 0 to 23, of type
hourly (see below)
- **week:** an object with days 0 to 6, of type daily
- **format:** an object specifying the meaning of `hourly` and `daily` type
for the objects above
The keys for `today`, `tomorrow` and `dayAfterTomorrow` are intended as hours
where `0` refers to the time from 0:00 to 0:59, and `23` refers to the time from
23:00 to 23:59.
The keys for the `week` entry are intended as an offset from today. That is, the
object at `0` will be the results for today, the object at `1` will be the
results for tomorrow, and the object at `6` will be the results for 6 days from
now
Each source has its own format and they are specified below in the sources
section (as well as in the object returned by the scraper)
### Sources
These are the sources that are currently implemented or will be implemented
eventually, together with the current level of implementation
```
✅ = implemented
🚧 = partially implemented
⛔️ = Not implemented
```
| Source | Status | Comments |
| ---------------------------------------------------- | ------ | ---------------------------------------- |
| [iLMeteo](https://www.ilmeteo.it) | ✅ | |
| [3Bmeteo](https://www.3bmeteo.com/) | 🚧 | Precipitation might not work as intended |
| [OpenMeteo](https://open-meteo.com/) | ✅ | |
| [Meteo Aeronautica Militare](http://www.meteoam.it/) | ⛔️ | |
<details>
<summary>iLMeteo</summary>
Format:
```json ```json
{ {
"today": { "hourly": {
"[0..23]": {
"temperature": { "temperature": {
"type": "number", "type": "number",
"unitOfMeasurement": "degrees", "unit": "°C"
"description": "expected temperature at the given hour"
}, },
"precipitation": { "precipitation": {
"type": "number", "type": "number",
"unitOfMeasurement": "mm", "unit": "mm"
"description": "expected precipitation at the given hour"
}, },
"weatherCode": { "apparentTemperature": {
"type": "string", "type": "number",
"description": "weather code (sunny / cloudy / ...)" "unit": "°C"
}
} }
}, },
"tomorrow": { "daily": {
"[0..23]": { "minimumTemperature": {
"temperature": {
"type": "number", "type": "number",
"unitOfMeasurement": "degrees", "unit": "°C"
"description": "expected temperature at the given hour"
}, },
"precipitation": { "maximumTemperature": {
"type": "number", "type": "number",
"unitOfMeasurement": "mm", "unit": "°C"
"description": "expected precipitation at the given hour"
}, },
"weatherCode": { "minimumApparentTemperature": {
"type": "string", "type": "number",
"description": "weather code (sunny / cloudy / ...)" "unit": "°C"
},
"maximumApparentTemperature": {
"type": "number",
"unit": "°C"
},
"precipitationSum": {
"type": "number",
"unit": "mm"
} }
} }
}, }
"dayAfterTomorrow": { ```
"[0..23]": {
</details>
<details>
<summary>3Bmeteo</summary>
Format:
```json
{
"hourly": {
"temperature": { "temperature": {
"type": "number", "type": "number",
"unitOfMeasurement": "degrees", "unit": "°C"
"description": "expected temperature at the given hour"
}, },
"precipitation": { "precipitation": {
"type": "number", "type": "number",
"unitOfMeasurement": "mm", "unit": "mm"
"description": "expected precipitation at the given hour" },
"apparentTemperature": {
"type": "number",
"unit": "°C"
}, },
"weatherCode": { "weatherCode": {
"type": "string", "type": "string"
"description": "weather code (sunny / cloudy / ...)"
}
} }
}, },
"week": { "daily": {
"[0..6]": {
"minimumTemperature": { "minimumTemperature": {
"type": "number", "type": "number",
"unitOfMeasurement": "degrees", "unit": "°C"
"description": "minimum expected temperature for the day"
}, },
"maximumTemperature": { "maximumTemperature": {
"type": "number", "type": "number",
"unitOfMeasurement": "degrees", "unit": "°C"
"description": "maximum expected temperature for the day"
}, },
"precipitationSum": { "minimumApparentTemperature": {
"type": "number", "type": "number",
"unitOfMeasurement": "mm", "unit": "°C"
"description": "total expected precipitation for the day"
}, },
"weatherCode": { "maximumApparentTemperature": {
"type": "string", "type": "number",
"description": "weather code (sunny / cloudy / ...)" "unit": "°C"
} },
"precipitationSum": {
"type": "number",
"unit": "mm"
} }
} }
} }
``` ```
The keys for the `week` entry are intended as an offset from today. That is, the </details>
object at `0` will be the results for today, the object at `1` will be the
results for tomorrow, and the object at `6` will be the results for 6 days from
now
### Sources <details>
<summary>OpenMeteo</summary>
These are the sources that are currently implemented or will be implemented Format:
eventually, together with the current level of implementation
``` ```json
✅ = implemented {
🚧 = partially implemented "hourly": {
⛔️ = Not implemented "temperature": {
"type": "number",
"unit": "°C"
},
"precipitation": {
"type": "number",
"unit": "mm"
},
"apparentTemperature": {
"type": "number",
"unit": "°C"
},
"weatherCode": {
"type": "number",
"unit": "WMO code"
}
},
"daily": {
"minimumTemperature": {
"type": "number",
"unit": "°C"
},
"maximumTemperature": {
"type": "number",
"unit": "°C"
},
"minimumApparentTemperature": {
"type": "number",
"unit": "°C"
},
"maximumApparentTemperature": {
"type": "number",
"unit": "°C"
},
"precipitationSum": {
"type": "number",
"unit": "mm"
},
"weatherCode": {
"type": "number",
"unit": "WMO code"
}
}
}
``` ```
| Source | Status | Comments | </details>
| ---------------------------------------------------- | ------ | ---------------------------------------- |
| [iLMeteo](https://www.ilmeteo.it) | 🚧 | Weather Code not working |
| [3Bmeteo](https://www.3bmeteo.com/) | 🚧 | Precipitation might not work as intended |
| [OpenMeteo](https://open-meteo.com/) | 🚧 | Weather Code are given in WMO Code |
| [Meteo Aeronautica Militare](http://www.meteoam.it/) | ⛔️ | |

@ -22,9 +22,9 @@ const run = async () => {
await browser.close() await browser.close()
return { return {
iLMeteo, // iLMeteo,
treBmeteo, treBmeteo,
openMeteo, // openMeteo,
} }
} }

@ -1,23 +1,71 @@
const FORMAT = {
hourly: {
temperature: {
type: 'number',
unit: '°C',
},
precipitation: {
type: 'number',
unit: 'mm',
},
apparentTemperature: {
type: 'number',
unit: '°C',
},
weatherCode: {
type: 'string',
},
},
daily: {
minimumTemperature: {
type: 'number',
unit: '°C',
},
maximumTemperature: {
type: 'number',
unit: '°C',
},
minimumApparentTemperature: {
type: 'number',
unit: '°C',
},
maximumApparentTemperature: {
type: 'number',
unit: '°C',
},
precipitationSum: {
type: 'number',
unit: 'mm',
},
},
}
const parseTemp = el => { const parseTemp = el => {
return parseFloat(el.textContent) return parseFloat(el.textContent)
} }
const parsePrec = el => { const parsePrec = el => {
const text = el.textContent const text = el.textContent.trim()
if (text === ' assenti\n ') return 0 if (text === 'assenti') return 0
if (text === 'deboli') return 0.05
try {
//TODO not 100% sure that this would work as it was never tested //TODO not 100% sure that this would work as it was never tested
return parseFloat(text) return parseFloat(text)
} catch (error) {
console.log(error)
return text
}
} }
const getStartTime = isToday => { // const getStartTime = isToday => {
if (!isToday) return 0 // if (!isToday) return 0
//
const d = new Date() // const d = new Date()
const h = d.getHours() // const h = d.getHours()
//
if (d.getMinutes() > 30) return h + 1 // if (d.getMinutes() > 30) return h + 1
return h // return h
} // }
const scrapePage = async (browser, url, isToday) => { const scrapePage = async (browser, url, isToday) => {
let result = {} let result = {}
@ -36,7 +84,6 @@ const scrapePage = async (browser, url, isToday) => {
for (let i = startTime; i <= endTime; i++) { for (let i = startTime; i <= endTime; i++) {
try { try {
const row = rows[rows.length - 1 - (endTime - i)] const row = rows[rows.length - 1 - (endTime - i)]
// console.log(await row.evaluate(el => el.children))
const [rowLeft, rowRight] = await row.$$(':scope > div') const [rowLeft, rowRight] = await row.$$(':scope > div')
const codeDiv = (await (await rowLeft.$('.row-table')).$$('div'))[2] const codeDiv = (await (await rowLeft.$('.row-table')).$$('div'))[2]
@ -51,9 +98,7 @@ const scrapePage = async (browser, url, isToday) => {
const temperature = await ( const temperature = await (
await tempDiv.$('span') await tempDiv.$('span')
).evaluate(parseTemp) ).evaluate(parseTemp)
const precipitation = await ( const precipitation = await precDiv.evaluate(parsePrec)
await precDiv.$('span')
).evaluate(parsePrec)
const apparentTemperature = await ( const apparentTemperature = await (
await appTempDiv.$('span') await appTempDiv.$('span')
).evaluate(parseTemp) ).evaluate(parseTemp)
@ -100,7 +145,6 @@ const getDaySummary = day => {
minimumApparentTemperature, minimumApparentTemperature,
maximumApparentTemperature, maximumApparentTemperature,
precipitationSum, precipitationSum,
weatherCode: null,
} }
} }
@ -118,6 +162,7 @@ export default async browser => {
]) ])
return { return {
format: FORMAT,
today: results[0], today: results[0],
tomorrow: results[1], tomorrow: results[1],
dayAfterTomorrow: results[2], dayAfterTomorrow: results[2],

@ -1,3 +1,50 @@
const FORMAT = {
hourly: {
temperature: {
type: 'number',
unit: '°C',
},
precipitation: {
type: 'number',
unit: 'mm',
},
apparentTemperature: {
type: 'number',
unit: '°C',
},
weatherCode: {
type: 'number',
unit: 'WMO code',
},
},
daily: {
minimumTemperature: {
type: 'number',
unit: '°C',
},
maximumTemperature: {
type: 'number',
unit: '°C',
},
minimumApparentTemperature: {
type: 'number',
unit: '°C',
},
maximumApparentTemperature: {
type: 'number',
unit: '°C',
},
precipitationSum: {
type: 'number',
unit: 'mm',
},
weatherCode: {
type: 'number',
unit: 'WMO code',
},
},
}
const getDailyData = async () => { const getDailyData = async () => {
const response = await fetch( const response = await fetch(
'https://api.open-meteo.com/v1/forecast?latitude=43.7085&longitude=10.4036&hourly=temperature_2m,relative_humidity_2m,apparent_temperature,precipitation,weather_code&timezone=Europe%2FBerlin&forecast_days=3', 'https://api.open-meteo.com/v1/forecast?latitude=43.7085&longitude=10.4036&hourly=temperature_2m,relative_humidity_2m,apparent_temperature,precipitation,weather_code&timezone=Europe%2FBerlin&forecast_days=3',

@ -1,3 +1,42 @@
const FORMAT = {
hourly: {
temperature: {
type: 'number',
unit: '°C',
},
precipitation: {
type: 'number',
unit: 'mm',
},
apparentTemperature: {
type: 'number',
unit: '°C',
},
},
daily: {
minimumTemperature: {
type: 'number',
unit: '°C',
},
maximumTemperature: {
type: 'number',
unit: '°C',
},
minimumApparentTemperature: {
type: 'number',
unit: '°C',
},
maximumApparentTemperature: {
type: 'number',
unit: '°C',
},
precipitationSum: {
type: 'number',
unit: 'mm',
},
},
}
const parseTemp = el => { const parseTemp = el => {
return parseFloat(el.textContent) return parseFloat(el.textContent)
} }
@ -18,23 +57,23 @@ const scrapePage = async (browser, url, isToday) => {
const startTime = isToday ? new Date().getHours() + 1 : 1 const startTime = isToday ? new Date().getHours() + 1 : 1
const endTime = 24 const endTime = 24
if (isToday) { // if (isToday) {
try { // try {
const row = await weatherTable.$$('.latest_detection') // const row = await weatherTable.$$('.latest_detection')
const fields = await row[1]?.$$('td') // const fields = await row[1]?.$$('td')
const temperature = await fields[2].evaluate(parseTemp) // const temperature = await fields[2].evaluate(parseTemp)
const precipitation = await fields[3].evaluate(parsePrec) // const precipitation = await fields[3].evaluate(parsePrec)
result[startTime - 1] = { // result[startTime - 1] = {
temperature, // temperature,
precipitation, // precipitation,
apparentTemperature: null, // apparentTemperature: null,
weatherCode: null, // weatherCode: null,
} // }
} catch (error) { // } catch (error) {
result[startTime - 1] = null // result[startTime - 1] = null
// console.log(error) // // console.log(error)
} // }
} // }
for (let i = startTime; i <= endTime; i++) { for (let i = startTime; i <= endTime; i++) {
try { try {
const selector = isToday const selector = isToday
@ -50,7 +89,6 @@ const scrapePage = async (browser, url, isToday) => {
temperature, temperature,
precipitation, precipitation,
apparentTemperature, apparentTemperature,
weatherCode: null,
} }
} catch (error) { } catch (error) {
result[i] = null result[i] = null
@ -99,7 +137,6 @@ const getDaySummary = day => {
minimumApparentTemperature, minimumApparentTemperature,
maximumApparentTemperature, maximumApparentTemperature,
precipitationSum, precipitationSum,
weatherCode: null,
} }
} }
@ -119,6 +156,7 @@ export default async browser => {
) )
return { return {
format: FORMAT,
today: results[0], today: results[0],
tomorrow: results[1], tomorrow: results[1],
dayAfterTomorrow: results[2], dayAfterTomorrow: results[2],

Loading…
Cancel
Save