Problem: the size and variety of datasets have increased exponentially and new data science methodologies have appeared
from IPython.display import IFrame
IFrame("http://opendap.puertos.es/thredds/catalog.html", width=1200, height=600)
%matplotlib inline
import warnings
warnings.simplefilter("ignore")
%ls data/*.nc
import xarray as xr
ds = xr.open_dataset("data/total_precipitation.nc")
ds
ds = xr.open_mfdataset("data/*.nc", combine="by_coords") # , parallel=True
ds
ds_noaa = xr.open_dataset(
"http://www.esrl.noaa.gov/psd/thredds/dodsC/Datasets/noaa.ersst.v5/sst.mnmean.nc"
)
ds_noaa
lazy loading of:
- on-disk datasets or
- remote
main_var = "swh"
aux_var = "tp"
ds[main_var]
time_ini = "2000-01-01"
time_end = "2002-12-31"
longitude = -22
latitude = 16
ds[main_var].sel(time=slice(time_ini, time_end)).plot()
The purpose of visualization is insight, not pictures
― Ben Shneiderman
import hvplot.xarray
ds[main_var].sel(time=slice(time_ini, time_end)).hvplot()
ds[main_var].sel(longitude=longitude, latitude=latitude, method="nearest").hvplot()
ds[main_var].sel(longitude=longitude, latitude=latitude, method="nearest").groupby(
"time.month"
).mean().hvplot()
ds[main_var].sel(time=slice(time_ini, time_end)).sel(
longitude=longitude, latitude=latitude, method="nearest"
).hvplot()
import cartopy.crs as ccrs
import numpy as np
import matplotlib.pyplot as plt
proj = ccrs.PlateCarree()
figsize = (10, 5)
zoom = 1
lon_min = ds.coords["longitude"].min()
lon_max = ds.coords["longitude"].max()
lat_min = ds.coords["latitude"].min()
lat_max = ds.coords["latitude"].max()
extent = [
lon_min - zoom * 0.5,
lon_max + zoom * 0.5,
lat_min - zoom * 0.5,
lat_max + zoom * 0.5,
]
resolution = "50m"
def default_map(axes=None, global_map=False, background=True):
if axes is None:
fig, ax = plt.subplots(figsize=figsize, subplot_kw={"projection": proj})
else:
ax = axes
if global_map:
ax.set_global()
else:
ax.set_extent(extent)
ax.coastlines(resolution=resolution)
if background:
ax.stock_img()
return ax
ax = default_map(global_map=True)
plt.scatter(
ds.coords["longitude"][0],
ds.coords["latitude"][0],
c="navy",
s=100,
marker="o",
edgecolors="white",
alpha=0.9,
)
plt.show()
ax = default_map(background=False)
lons, lats = np.meshgrid(ds.coords["longitude"], ds.coords["latitude"])
plt.scatter(lons, lats, c="blue", s=1)
plt.show()
import geoviews.feature as gf
ds[main_var].isel(time=0).hvplot(
"longitude", "latitude", crs=proj, cmap="viridis", width=500, height=500
) * gf.coastline.options(scale=resolution, line_width=2)
df = ds.sel(longitude=longitude, latitude=latitude, method="nearest").to_dataframe()[
main_var
]
df
df.to_csv("data/swh.csv", header=True)
df.to_csv("data/swh.zip", header=True) # compression="zip"
%ls -lh data/swh*
%ls -lh data/SIMAR*
import pandas as pd
df_file = pd.read_csv("data/swh.zip", index_col="time", parse_dates=True)
df_file
df_simar = pd.read_csv(
"data/SIMAR_1052046",
delim_whitespace=True,
skiprows=80,
parse_dates=[[0, 1, 2, 3]],
index_col=0,
header=0,
na_values=-99.9,
)
df_simar.to_csv("data/SIMAR_1052046.zip", header=True)
import hvplot.pandas
df_file.hvplot()
This project aims to provide a way to Co-locate oceanographic data by establishing constraints. The tools developed allow users to provide geospacial bounds in a temporal range to get an aggregated response of all available data.