every source has its own format

main
Fran314 3 months ago
parent 1f8fcb3273
commit 3b694826c4

@ -6,7 +6,7 @@
> be releasing on time~
An utility to scrape italian weather websites to collect and compare weather
information
information for Pisa
## Usage
@ -15,95 +15,31 @@ information
> complicated than it needs to be. You can remove unnecessary stuff if you're
> not running this on nix
The scraper collects data and arranges in the following scheme
The script scrapes the weather forecast from the implemented sources (see the
table below) and returns an object with the following fields
```json
{
"today": {
"[0..23]": {
"temperature": {
"type": "number",
"unitOfMeasurement": "degrees",
"description": "expected temperature at the given hour"
},
"precipitation": {
"type": "number",
"unitOfMeasurement": "mm",
"description": "expected precipitation at the given hour"
},
"weatherCode": {
"type": "string",
"description": "weather code (sunny / cloudy / ...)"
}
}
},
"tomorrow": {
"[0..23]": {
"temperature": {
"type": "number",
"unitOfMeasurement": "degrees",
"description": "expected temperature at the given hour"
},
"precipitation": {
"type": "number",
"unitOfMeasurement": "mm",
"description": "expected precipitation at the given hour"
},
"weatherCode": {
"type": "string",
"description": "weather code (sunny / cloudy / ...)"
}
}
},
"dayAfterTomorrow": {
"[0..23]": {
"temperature": {
"type": "number",
"unitOfMeasurement": "degrees",
"description": "expected temperature at the given hour"
},
"precipitation": {
"type": "number",
"unitOfMeasurement": "mm",
"description": "expected precipitation at the given hour"
},
"weatherCode": {
"type": "string",
"description": "weather code (sunny / cloudy / ...)"
}
}
},
"week": {
"[0..6]": {
"minimumTemperature": {
"type": "number",
"unitOfMeasurement": "degrees",
"description": "minimum expected temperature for the day"
},
"maximumTemperature": {
"type": "number",
"unitOfMeasurement": "degrees",
"description": "maximum expected temperature for the day"
},
"precipitationSum": {
"type": "number",
"unitOfMeasurement": "mm",
"description": "total expected precipitation for the day"
},
"weatherCode": {
"type": "string",
"description": "weather code (sunny / cloudy / ...)"
}
}
}
}
```
- **today:** an object with elements from the hour after the current one to
23, of type hourly (see below)
- **tomorrow:** an object with elements from hours 0 to 23, of type hourly
(see below)
- **dayAfterTomorrow:** an object with elements from hours 0 to 23, of type
hourly (see below)
- **week:** an object with days 0 to 6, of type daily
- **format:** an object specifying the meaning of `hourly` and `daily` type
for the objects above
The keys for `today`, `tomorrow` and `dayAfterTomorrow` are intended as hours
where `0` refers to the time from 0:00 to 0:59, and `23` refers to the time from
23:00 to 23:59.
The keys for the `week` entry are intended as an offset from today. That is, the
object at `0` will be the results for today, the object at `1` will be the
results for tomorrow, and the object at `6` will be the results for 6 days from
now
Each source has its own format and they are specified below in the sources
section (as well as in the object returned by the scraper)
### Sources
These are the sources that are currently implemented or will be implemented
@ -117,7 +53,162 @@ eventually, together with the current level of implementation
| Source | Status | Comments |
| ---------------------------------------------------- | ------ | ---------------------------------------- |
| [iLMeteo](https://www.ilmeteo.it) | 🚧 | Weather Code not working |
| [iLMeteo](https://www.ilmeteo.it) | ✅ | |
| [3Bmeteo](https://www.3bmeteo.com/) | 🚧 | Precipitation might not work as intended |
| [OpenMeteo](https://open-meteo.com/) | 🚧 | Weather Code are given in WMO Code |
| [OpenMeteo](https://open-meteo.com/) | ✅ | |
| [Meteo Aeronautica Militare](http://www.meteoam.it/) | ⛔️ | |
<details>
<summary>iLMeteo</summary>
Format:
```json
{
"hourly": {
"temperature": {
"type": "number",
"unit": "°C"
},
"precipitation": {
"type": "number",
"unit": "mm"
},
"apparentTemperature": {
"type": "number",
"unit": "°C"
}
},
"daily": {
"minimumTemperature": {
"type": "number",
"unit": "°C"
},
"maximumTemperature": {
"type": "number",
"unit": "°C"
},
"minimumApparentTemperature": {
"type": "number",
"unit": "°C"
},
"maximumApparentTemperature": {
"type": "number",
"unit": "°C"
},
"precipitationSum": {
"type": "number",
"unit": "mm"
}
}
}
```
</details>
<details>
<summary>3Bmeteo</summary>
Format:
```json
{
"hourly": {
"temperature": {
"type": "number",
"unit": "°C"
},
"precipitation": {
"type": "number",
"unit": "mm"
},
"apparentTemperature": {
"type": "number",
"unit": "°C"
},
"weatherCode": {
"type": "string"
}
},
"daily": {
"minimumTemperature": {
"type": "number",
"unit": "°C"
},
"maximumTemperature": {
"type": "number",
"unit": "°C"
},
"minimumApparentTemperature": {
"type": "number",
"unit": "°C"
},
"maximumApparentTemperature": {
"type": "number",
"unit": "°C"
},
"precipitationSum": {
"type": "number",
"unit": "mm"
}
}
}
```
</details>
<details>
<summary>OpenMeteo</summary>
Format:
```json
{
"hourly": {
"temperature": {
"type": "number",
"unit": "°C"
},
"precipitation": {
"type": "number",
"unit": "mm"
},
"apparentTemperature": {
"type": "number",
"unit": "°C"
},
"weatherCode": {
"type": "number",
"unit": "WMO code"
}
},
"daily": {
"minimumTemperature": {
"type": "number",
"unit": "°C"
},
"maximumTemperature": {
"type": "number",
"unit": "°C"
},
"minimumApparentTemperature": {
"type": "number",
"unit": "°C"
},
"maximumApparentTemperature": {
"type": "number",
"unit": "°C"
},
"precipitationSum": {
"type": "number",
"unit": "mm"
},
"weatherCode": {
"type": "number",
"unit": "WMO code"
}
}
}
```
</details>

@ -22,9 +22,9 @@ const run = async () => {
await browser.close()
return {
iLMeteo,
// iLMeteo,
treBmeteo,
openMeteo,
// openMeteo,
}
}

@ -1,23 +1,71 @@
const FORMAT = {
hourly: {
temperature: {
type: 'number',
unit: '°C',
},
precipitation: {
type: 'number',
unit: 'mm',
},
apparentTemperature: {
type: 'number',
unit: '°C',
},
weatherCode: {
type: 'string',
},
},
daily: {
minimumTemperature: {
type: 'number',
unit: '°C',
},
maximumTemperature: {
type: 'number',
unit: '°C',
},
minimumApparentTemperature: {
type: 'number',
unit: '°C',
},
maximumApparentTemperature: {
type: 'number',
unit: '°C',
},
precipitationSum: {
type: 'number',
unit: 'mm',
},
},
}
const parseTemp = el => {
return parseFloat(el.textContent)
}
const parsePrec = el => {
const text = el.textContent
if (text === ' assenti\n ') return 0
//TODO not 100% sure that this would work as it was never tested
return parseFloat(text)
const text = el.textContent.trim()
if (text === 'assenti') return 0
if (text === 'deboli') return 0.05
try {
//TODO not 100% sure that this would work as it was never tested
return parseFloat(text)
} catch (error) {
console.log(error)
return text
}
}
const getStartTime = isToday => {
if (!isToday) return 0
const d = new Date()
const h = d.getHours()
if (d.getMinutes() > 30) return h + 1
return h
}
// const getStartTime = isToday => {
// if (!isToday) return 0
//
// const d = new Date()
// const h = d.getHours()
//
// if (d.getMinutes() > 30) return h + 1
// return h
// }
const scrapePage = async (browser, url, isToday) => {
let result = {}
@ -36,7 +84,6 @@ const scrapePage = async (browser, url, isToday) => {
for (let i = startTime; i <= endTime; i++) {
try {
const row = rows[rows.length - 1 - (endTime - i)]
// console.log(await row.evaluate(el => el.children))
const [rowLeft, rowRight] = await row.$$(':scope > div')
const codeDiv = (await (await rowLeft.$('.row-table')).$$('div'))[2]
@ -51,9 +98,7 @@ const scrapePage = async (browser, url, isToday) => {
const temperature = await (
await tempDiv.$('span')
).evaluate(parseTemp)
const precipitation = await (
await precDiv.$('span')
).evaluate(parsePrec)
const precipitation = await precDiv.evaluate(parsePrec)
const apparentTemperature = await (
await appTempDiv.$('span')
).evaluate(parseTemp)
@ -100,7 +145,6 @@ const getDaySummary = day => {
minimumApparentTemperature,
maximumApparentTemperature,
precipitationSum,
weatherCode: null,
}
}
@ -118,6 +162,7 @@ export default async browser => {
])
return {
format: FORMAT,
today: results[0],
tomorrow: results[1],
dayAfterTomorrow: results[2],

@ -1,3 +1,50 @@
const FORMAT = {
hourly: {
temperature: {
type: 'number',
unit: '°C',
},
precipitation: {
type: 'number',
unit: 'mm',
},
apparentTemperature: {
type: 'number',
unit: '°C',
},
weatherCode: {
type: 'number',
unit: 'WMO code',
},
},
daily: {
minimumTemperature: {
type: 'number',
unit: '°C',
},
maximumTemperature: {
type: 'number',
unit: '°C',
},
minimumApparentTemperature: {
type: 'number',
unit: '°C',
},
maximumApparentTemperature: {
type: 'number',
unit: '°C',
},
precipitationSum: {
type: 'number',
unit: 'mm',
},
weatherCode: {
type: 'number',
unit: 'WMO code',
},
},
}
const getDailyData = async () => {
const response = await fetch(
'https://api.open-meteo.com/v1/forecast?latitude=43.7085&longitude=10.4036&hourly=temperature_2m,relative_humidity_2m,apparent_temperature,precipitation,weather_code&timezone=Europe%2FBerlin&forecast_days=3',

@ -1,3 +1,42 @@
const FORMAT = {
hourly: {
temperature: {
type: 'number',
unit: '°C',
},
precipitation: {
type: 'number',
unit: 'mm',
},
apparentTemperature: {
type: 'number',
unit: '°C',
},
},
daily: {
minimumTemperature: {
type: 'number',
unit: '°C',
},
maximumTemperature: {
type: 'number',
unit: '°C',
},
minimumApparentTemperature: {
type: 'number',
unit: '°C',
},
maximumApparentTemperature: {
type: 'number',
unit: '°C',
},
precipitationSum: {
type: 'number',
unit: 'mm',
},
},
}
const parseTemp = el => {
return parseFloat(el.textContent)
}
@ -18,23 +57,23 @@ const scrapePage = async (browser, url, isToday) => {
const startTime = isToday ? new Date().getHours() + 1 : 1
const endTime = 24
if (isToday) {
try {
const row = await weatherTable.$$('.latest_detection')
const fields = await row[1]?.$$('td')
const temperature = await fields[2].evaluate(parseTemp)
const precipitation = await fields[3].evaluate(parsePrec)
result[startTime - 1] = {
temperature,
precipitation,
apparentTemperature: null,
weatherCode: null,
}
} catch (error) {
result[startTime - 1] = null
// console.log(error)
}
}
// if (isToday) {
// try {
// const row = await weatherTable.$$('.latest_detection')
// const fields = await row[1]?.$$('td')
// const temperature = await fields[2].evaluate(parseTemp)
// const precipitation = await fields[3].evaluate(parsePrec)
// result[startTime - 1] = {
// temperature,
// precipitation,
// apparentTemperature: null,
// weatherCode: null,
// }
// } catch (error) {
// result[startTime - 1] = null
// // console.log(error)
// }
// }
for (let i = startTime; i <= endTime; i++) {
try {
const selector = isToday
@ -50,7 +89,6 @@ const scrapePage = async (browser, url, isToday) => {
temperature,
precipitation,
apparentTemperature,
weatherCode: null,
}
} catch (error) {
result[i] = null
@ -99,7 +137,6 @@ const getDaySummary = day => {
minimumApparentTemperature,
maximumApparentTemperature,
precipitationSum,
weatherCode: null,
}
}
@ -119,6 +156,7 @@ export default async browser => {
)
return {
format: FORMAT,
today: results[0],
tomorrow: results[1],
dayAfterTomorrow: results[2],

Loading…
Cancel
Save