Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add era5 examples as python scripts #61

Open
wants to merge 14 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,6 @@ docs/temp/*

# Mypy Cache
.mypy_cache/

# Example data
examples/example_data/*
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ include LICENSE
include README.rst

exclude docs/temp/*.rst
prune examples

recursive-include docs *.ipynb
recursive-include examples *.csv
Expand Down
Binary file removed examples/ERA5_example_data.nc
Binary file not shown.
7 changes: 0 additions & 7 deletions examples/berlin_shape.geojson

This file was deleted.

98 changes: 98 additions & 0 deletions examples/era5_download_weather_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#!/usr/bin/env python
# coding: utf-8

"""
This example shows you how to download ERA5 weather data from the
`Climate Data Store (CDS) <https://cds.climate.copernicus.eu>`_ and store it
locally.

In order to download ERA5 weather data you need an account at the
`CDS <https://cds.climate.copernicus.eu>`_.

Furthermore, you need to install the cdsapi package. See the howto for the
`api <https://cds.climate.copernicus.eu/api-how-to>`_ for installation details.

When downloading the data using the API your request gets queued and may take
a while to be completed.

Besides a location you have to specify a time period for which you would like
to download the data as well as the weather variables you need. The feedinlib
provides predefined sets of variables that are needed to use the pvlib and
windpowerlib. These can be applied by setting the `variable` parameter to
"pvlib" or "windpowerlib", as shown below. If you want to download data for
both libraries you can set `variable` to "feedinlib".

Concerning the start and end date, keep in mind that all timestamps in the
`feedinlib` are in UTC. So if you later on want to convert the data to a
different time zone, the data may not cover the whole period you intended to
download. To avoid this set your start date to one day before the start of
your required period if you are East of the zero meridian or your end date
to one day after your required period ends if you are West of the zero
meridian.
"""

import os.path

from feedinlib import era5


def download_era5_examples():

example_data = os.path.join(os.path.dirname(__file__), "example_data")

# Download a single coordinate for the year 2019 with all variables
single_coord = {
"latitude": 54.16,
"longitude": 9.08,
"start_date": "2019-01-01",
"end_date": "2019-12-31",
"variable": "feedinlib",
"target_file": os.path.join(
example_data, "era5_feedinlib_54-16_9-08_2019-01-01_2019-12-31.nc"
),
}
era5.get_era5_data_from_datespan_and_position(**single_coord)

# Download a single coordinate for the year 2019 with windpowerlib
# variables
single_coord = {
"latitude": 54.43,
"longitude": 7.68,
"start_date": "2019-01-01",
"end_date": "2019-12-31",
"variable": "windpowerlib",
"target_file": os.path.join(
example_data,
"era5_windpowerlib_54-43_7-68_2019-01-01_2019-12-31.nc",
),
}
era5.get_era5_data_from_datespan_and_position(**single_coord)

# When wanting to download weather data for a region you have to provide a
# bounding box with latitude and longitude as lists.
bb_berlin_coord = {
"latitude": [52.3, 52.8], # [latitude south, latitude north]
"longitude": [13.1, 13.7], # [longitude west, longitude east]
"start_date": "2017-01-01",
"end_date": "2017-12-31",
"variable": "feedinlib",
"target_file": os.path.join(
example_data, "era5_feedinlib_berlin_2017.nc"
),
}
era5.get_era5_data_from_datespan_and_position(**bb_berlin_coord)

# Download all coordinates of the world for one year
world = {
"variable": "feedinlib",
"start_date": "2017-01-01",
"end_date": "2017-12-31",
Comment on lines +88 to +89
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At the top, you talk about time zones. Maybe go from 2016-12-21 to 2018-01-01 to emphasize that point.

"target_file": os.path.join(
example_data, "era5_world_feedinlib_2017.nc"
),
}
era5.get_era5_data_from_datespan_and_position(**world)


if __name__ == "__main__":
download_era5_examples()
90 changes: 90 additions & 0 deletions examples/era5_download_weather_data_multiprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/usr/bin/env python
# coding: utf-8

"""
This example shows you how to download ERA5 weather data from the
`Climate Data Store (CDS) <https://cds.climate.copernicus.eu>`_ and store it
locally.

In order to download ERA5 weather data you need an account at the
`CDS <https://cds.climate.copernicus.eu>`_.

Furthermore, you need to install the cdsapi package. See the howto for the
`api <https://cds.climate.copernicus.eu/api-how-to>`_ for installation details.

When downloading the data using the API your request gets queued and may take
a while to be completed.

Besides a location you have to specify a time period for which you would like
to download the data as well as the weather variables you need. The feedinlib
provides predefined sets of variables that are needed to use the pvlib and
windpowerlib. These can be applied by setting the `variable` parameter to
"pvlib" or "windpowerlib", as shown below. If you want to download data for
both libraries you can set `variable` to "feedinlib".

Concerning the start and end date, keep in mind that all timestamps in the
`feedinlib` are in UTC. So if you later on want to convert the data to a
different time zone, the data may not cover the whole period you intended to
download. To avoid this set your start date to one day before the start of
your required period if you are East of the zero meridian or your end date
to one day after your required period ends if you are West of the zero
meridian.
"""

import math
import multiprocessing
import os.path

from feedinlib import era5


def download_era5(parameters):
era5.get_era5_data_from_datespan_and_position(
variable=parameters.get("variable"),
start_date=parameters.get("start_date"),
end_date=parameters.get("end_date"),
latitude=parameters.get("latitude"),
longitude=parameters.get("longitude"),
target_file=parameters.get("target_file"),
chunks=parameters.get("chunks"),
)


def download_era5_multiprocessing_examples():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find the function name confusing. It sounds like the examples are downloaded in the function. I would either just call it download_era5_multiprocessing or download_era5_multiprocessing_example.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I get it right, this is parallel download rather than "multi processing", am I right?

# Define the locations:
example_data = os.path.join(os.path.dirname(__file__), "example_data")

locations = [
{
"latitude": 54.16,
"longitude": 9.08,
"start_date": "2019-01-01",
"end_date": "2019-12-31",
"variable": "feedinlib",
"target_file": os.path.join(
example_data,
"era5_feedinlib_54-16_9-08_2019-01-01_2019-12-31.nc",
),
},
{
"latitude": 54.43,
"longitude": 7.68,
"start_date": "2019-01-01",
"end_date": "2019-12-31",
"variable": "windpowerlib",
"target_file": os.path.join(
example_data,
"era5_windpowerlib_54-43_7-68_2019-01-01_2019-12-31.nc",
),
},
]

# Download the data sets in parallel
maximal_number_of_cores = math.ceil(multiprocessing.cpu_count() * 0.5)
p = multiprocessing.Pool(maximal_number_of_cores)
p.map(download_era5, locations)
p.close()


if __name__ == "__main__":
download_era5_multiprocessing_examples()
160 changes: 160 additions & 0 deletions examples/era5_processing_weather_data_multi_locations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
"""
Read the single_location examples first.

"""

import json
import os

import geopandas as gpd
import requests
import xarray as xr
from matplotlib import pyplot as plt
from shapely.geometry import GeometryCollection
from shapely.geometry import Point
from shapely.geometry import Polygon
from shapely.geometry import shape

from feedinlib import Photovoltaic
from feedinlib import WindPowerPlant
from feedinlib.era5 import weather_df_from_era5


def processing_multi_locations():
Copy link
Member

@birgits birgits Jul 18, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would call the function processing_multiple_locations. Same goes for the file name which I would change to ..._multiple_locations.py.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Second.

data_path = os.path.join(os.path.dirname(__file__), "example_data")
os.makedirs(data_path, exist_ok=True)

# Download example files if they do not exist.
files = {
"zncmb": "era5_feedinlib_berlin_2017.nc",
"txmze": "berlin_shape.geojson",
"96qyt": "germany_simple.geojson",
}

files = {k: os.path.join(data_path, v) for k, v in files.items()}

for key, file in files.items():
if not os.path.isfile(file):
req = requests.get("https://osf.io/{0}/download".format(key))
with open(file, "wb") as fout:
fout.write(req.content)

# The example netCDF-file is fetched from the era5-server with the
# following bounding box: latitude = [52.3, 52.7] longitude = [13.1, 13.7]

# Read the netCDF-file into an xarray
era5_netcdf_filename = files["zncmb"]
# era5_netcdf_filename = "example_data/ERA5_example_data.nc"
ds = xr.open_dataset(era5_netcdf_filename)

# Extract all points from the netCDF-file:
points = []
for x in ds.longitude:
for y in ds.latitude:
points.append(Point(x, y))
points_df = gpd.GeoDataFrame({"geometry": points})

# Plot all points within the bounding box with the shape of Berlin
region_shape = gpd.read_file(
os.path.join(data_path, "berlin_shape.geojson")
)
base = region_shape.plot(color="white", edgecolor="black")
points_df.plot(ax=base, marker="o", color="red", markersize=5)
plt.show()

# With the `area` parameter you can specify a spatial subset of the weather
# data in your netCDF-file.
# In case `area` is not a single location, the index of the resulting
# dataframe will be a multiindex with levels (time, latitude, longitude).
# Be aware that in order to use it for pvlib or windpowerlib calculations
# you need to select just one location.

# Create a pandas DataFrame for all locations.
pvlib_all = weather_df_from_era5(
era5_netcdf_filename=era5_netcdf_filename, lib="pvlib"
)

print("All points:\n", pvlib_all.groupby(level=[1, 2]).mean().index)

# Create a pandas DataFrame for a bounding box.
area = [(13.2, 13.4), (52.4, 52.8)]
pvlib_bb = weather_df_from_era5(
era5_netcdf_filename=era5_netcdf_filename, lib="pvlib", area=area
)
print(
"Bounding box points:\n", pvlib_bb.groupby(level=[1, 2]).mean().index
)

# Create a pandas DataFrame for a polygon.
lat_point_list = [52.1, 52.1, 52.65]
lon_point_list = [13.0, 13.4, 13.4]
area = Polygon(zip(lon_point_list, lat_point_list))
pvlib_polygon = weather_df_from_era5(
era5_netcdf_filename=era5_netcdf_filename, lib="pvlib", area=area
)
print(
"Polygon points:\n", pvlib_polygon.groupby(level=[1, 2]).mean().index
)

# Create a pandas DataFrame for a multipolygon.
with open(os.path.join(data_path, "germany_simple.geojson")) as f:
features = json.load(f)["features"]
polygon = GeometryCollection(
[shape(feature["geometry"]).buffer(0) for feature in features]
)
area = polygon
pvlib_polygon_real = weather_df_from_era5(
era5_netcdf_filename=era5_netcdf_filename, lib="pvlib", area=area
)
print(
"Multipolygon points:\n",
pvlib_polygon_real.groupby(level=[1, 2]).mean().index,
)

# Create a pandas DataFrame for single location and a time subset (pv)
start = "2017-07-01"
end = "2017-07-31"
single_location = [13.2, 52.4]
pvlib_single = weather_df_from_era5(
era5_netcdf_filename=era5_netcdf_filename,
lib="pvlib",
area=single_location,
start=start,
end=end,
)

system_data = {
"module_name": "Advent_Solar_Ventura_210___2008_",
"inverter_name": "ABB__MICRO_0_25_I_OUTD_US_208__208V_",
"azimuth": 180,
"tilt": 30,
"albedo": 0.2,
}
pv_system = Photovoltaic(**system_data)
feedin = pv_system.feedin(weather=pvlib_single, location=(52.5, 13.1))
feedin.plot()

plt.show()

# Create a pandas DataFrame for single location and a time subset (pv)
start = "2017-07-01"
end = "2017-07-31"
single_location = [13.2, 52.4]
windpowerlib_single = weather_df_from_era5(
era5_netcdf_filename=era5_netcdf_filename,
lib="windpowerlib",
area=single_location,
start=start,
end=end,
)

turbine_data = {"turbine_type": "E-101/3050", "hub_height": 135}
wind_turbine = WindPowerPlant(**turbine_data)
feedin = wind_turbine.feedin(weather=windpowerlib_single)
feedin.plot()

plt.show()


if __name__ == "__main__":
processing_multi_locations()
Loading