Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft classes of object/opinionated layer #267

Draft
wants to merge 15 commits into
base: main
Choose a base branch
from
Draft
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ build/
dist/
*-output.ipynb
.vscode/
.idea/
*.code-workspace
**/__pycache__
10 changes: 9 additions & 1 deletion erddapy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
"""Easier access to scientific data."""

from erddapy.array_like import ERDDAPConnection, ERDDAPServer, GridDataset, TableDataset
from erddapy.erddapy import ERDDAP
from erddapy.servers.servers import servers

__all__ = ["ERDDAP", "servers"]
__all__ = [
"ERDDAP",
"servers",
"ERDDAPConnection",
"ERDDAPServer",
"TableDataset",
"GridDataset",
]

try:
from ._version import __version__
Expand Down
19 changes: 19 additions & 0 deletions erddapy/array_like/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""
This module contains opinionated, higher-level objects for searching servers and accessing datasets.

It is named 'objects' after object-relational mapping, which is the concept of having an object-oriented
layer between a database (in this case, ERDDAP), and the programming language.
"""


from .connection import ERDDAPConnection
from .datasets import ERDDAPDataset, GridDataset, TableDataset
from .server import ERDDAPServer

__all__ = [
"ERDDAPDataset",
"ERDDAPConnection",
"ERDDAPServer",
"TableDataset",
"GridDataset",
]
60 changes: 60 additions & 0 deletions erddapy/array_like/connection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""Class ERDDAPConnection to represent connection to a particular URL."""

from __future__ import annotations

from pathlib import Path
from typing import Union

StrLike = Union[str, bytes]
FilePath = Union[str, Path]


class ERDDAPConnection:
"""
Manages connection that will be used in ERDDAPServer instances.

While most ERDDAP servers allow connections via a bare url, some servers may require authentication
to access data.
"""

def __init__(self, server: str):
"""Initialize instance of ERDDAPConnection."""
self._server = self.to_string(server)

@classmethod
def to_string(cls, value):
"""Convert an instance of ERDDAPConnection to a string."""
if isinstance(value, str):
return value
elif isinstance(value, cls):
return value.server
else:
raise TypeError(
f"Server must be either a string or an instance of ERDDAPConnection. '{value}' was "
f"passed.",
)

def get(self, url_part: str) -> StrLike:
"""
Request data from the server.

Uses requests by default similar to most of the current erddapy data fetching functionality.

Can be overridden to use httpx, and potentially aiohttp or other async functionality, which could
hopefully make anything else async compatible.
"""
pass

def open(self, url_part: str) -> FilePath:
"""Yield file-like object for access for file types that don't enjoy getting passed a string."""
pass

@property
def server(self) -> str:
"""Access the private ._server attribute."""
return self._server

@server.setter
def server(self, value: str):
"""Set private ._server attribute."""
self._server = self.to_string(value)
106 changes: 106 additions & 0 deletions erddapy/array_like/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
"""Classes to represent ERDDAP datasets."""

from __future__ import annotations

from pathlib import Path
from typing import Union

from erddapy.array_like.connection import ERDDAPConnection

StrLike = Union[str, bytes]
FilePath = Union[str, Path]


class ERDDAPDataset:
"""Base class for more focused table or grid datasets."""

def __init__(
self,
dataset_id: str,
connection: str | ERDDAPConnection,
variables,
constraints,
):
"""Initialize instance of ERDDAPDataset."""
self.dataset_id = dataset_id
self._connection = ERDDAPConnection(ERDDAPConnection.to_string(connection))
self._variables = variables
self._constraints = constraints
self._meta = None

@property
def connection(self) -> ERDDAPConnection:
"""Access private ._connection variable."""
return self._connection

@connection.setter
def connection(self, value: str | ERDDAPConnection):
"""Set private ._connection variable."""
self._connection = ERDDAPConnection(ERDDAPConnection.to_string(value))

def get(self, file_type: str) -> StrLike:
"""Request data using underlying connection."""
return self.connection.get(file_type)

def open(self, file_type: str) -> FilePath:
"""Download and open dataset using underlying connection."""
return self.connection.open(file_type)

def get_meta(self):
"""Request dataset metadata from the server."""
self._meta = None

@property
def meta(self):
"""Access private ._meta attribute. Request metadata if ._meta is empty."""
return self.get_meta() if (self._meta is None) else self._meta

@property
def variables(self):
"""Access private ._variables attribute."""
return self._variables

@property
def constraints(self):
"""Access private ._constraints attribute."""
return self._constraints

def url_segment(self, file_type: str) -> str:
"""Return URL segment without the base URL (the portion after 'https://server.com/erddap/')."""
pass

def url(self, file_type: str) -> str:
"""
Return a URL constructed using the underlying ERDDAPConnection.

The URL will contain information regarding the base class server info, the dataset ID,
access method (tabledap/griddap), file type, variables, and constraints.

This allows ERDDAPDataset subclasses to be used as more opinionated URL constructors while still
not tying users to a specific IO method.

Not guaranteed to capture all the specifics of formatting a request, such as if a server requires
specific auth or headers.
"""
pass

def to_dataset(self):
"""Open the dataset as xarray dataset by downloading a subset NetCDF."""
pass

def opendap_dataset(self):
"""Open the full dataset in xarray via OpenDAP."""
pass


class TableDataset(ERDDAPDataset):
"""Subclass of ERDDAPDataset specific to TableDAP datasets."""

def to_dataframe(self):
"""Open the dataset as a Pandas DataFrame."""


class GridDataset(ERDDAPDataset):
"""Subclass of ERDDAPDataset specific to GridDAP datasets."""

pass
44 changes: 44 additions & 0 deletions erddapy/array_like/server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Class ERDDAPServer to represent an ERDDAP server connection."""

from __future__ import annotations

from erddapy.array_like.connection import ERDDAPConnection
from erddapy.array_like.datasets import ERDDAPDataset


class ERDDAPServer:
"""Instance of an ERDDAP server, with support to ERDDAP's native functionalities."""

def __init__(self, url: str, connection: ERDDAPConnection | None):
"""Initialize instance of ERDDAPServer."""
if "http" in url:
self.url = url
else:
# get URL from dict of ERDDAP servers
self._connection = connection or ERDDAPConnection()

@property
def connection(self) -> ERDDAPConnection:
"""Access private ._connection attribute."""
return self._connection

@connection.setter
def connection(self, value: str | ERDDAPConnection):
"""Set private ._connection attribute."""
self._connection = value or ERDDAPConnection()

def full_text_search(self, query: str) -> dict[str, ERDDAPDataset]:
"""Search the server with native ERDDAP full text search capabilities."""
pass

def search(self, query: str) -> dict[str, ERDDAPDataset]:
"""
Search the server with native ERDDAP full text search capabilities.

Also see ERDDAPServer.full_text_search.
"""
return self.full_text_search(query)

def advanced_search(self, **kwargs) -> dict[str, ERDDAPDataset]:
"""Search server with ERDDAP advanced search capabilities (may return pre-filtered datasets)."""
pass
52 changes: 52 additions & 0 deletions erddapy/core/interfaces.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""
Interface between URL responses and third-party libraries.

This module takes an URL or the bytes response of a request and converts it to Pandas,
XArray, Iris, etc. objects.
"""

import iris
import pandas as pd
import xarray as xr
from netCDF4 import Dataset as ncDataset

from erddapy.core.netcdf import _nc_dataset, _tempnc
from erddapy.core.url import urlopen


def to_pandas(url: str, requests_kwargs=dict(), **kw) -> pd.DataFrame:
"""Convert a URL to Pandas DataFrame."""
data = urlopen(url, **requests_kwargs)
try:
return pd.read_csv(data, **kw)
except Exception:
print("Couldn't process response into Pandas DataFrame.")
raise


def to_ncCF(url: str, **kw) -> ncDataset:
"""Convert a URL to a netCDF4 Dataset."""
auth = kw.pop("auth", None)
return _nc_dataset(url, auth=auth, **kw)


def to_xarray(url: str, response="opendap", **kw) -> xr.Dataset:
"""Convert a URL to an xarray dataset."""
auth = kw.pop("auth", None)
if response == "opendap":
return xr.open_dataset(url, **kw)
else:
nc = _nc_dataset(url, auth=auth, **kw)
return xr.open_dataset(xr.backends.NetCDF4DataStore(nc), **kw)


def to_iris(url: str, **kw):
"""Convert a URL to an iris CubeList."""
data = urlopen(url, **kw)
with _tempnc(data) as tmp:
cubes = iris.load_raw(tmp, **kw)
try:
cubes.realise_data()
except ValueError:
_ = [cube.data for cube in cubes]
return cubes
33 changes: 10 additions & 23 deletions erddapy/erddapy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
_griddap_check_variables,
_griddap_get_constraints,
)
from erddapy.core.netcdf import _nc_dataset, _tempnc
from erddapy.core.interfaces import to_iris, to_ncCF, to_pandas, to_xarray
from erddapy.core.url import (
_check_substrings,
_distinct,
Expand Down Expand Up @@ -344,50 +344,37 @@ def to_pandas(self, **kw):
"""
response = kw.pop("response", "csvp")
url = self.get_download_url(response=response, **kw)
data = urlopen(url, auth=self.auth, **self.requests_kwargs)
return pd.read_csv(data, **kw)
return to_pandas(url, **kw)

def to_ncCF(self, **kw):
"""Load the data request into a Climate and Forecast compliant netCDF4-python object."""
if self.protocol == "griddap":
return ValueError("Cannot use ncCF with griddap.")
url = self.get_download_url(response="ncCF", **kw)
nc = _nc_dataset(url, auth=self.auth, **self.requests_kwargs)
return nc
return to_ncCF(url, **kw)

def to_xarray(self, **kw):
"""Load the data request into a xarray.Dataset.

Accepts any `xr.open_dataset` keyword arguments.
"""
import xarray as xr

if self.response == "opendap":
url = self.get_download_url()
return xr.open_dataset(url, **kw)
response = "opendap"
elif self.protocol == "griddap":
response = "nc"
else:
response = "nc" if self.protocol == "griddap" else "ncCF"
url = self.get_download_url(response=response)
nc = _nc_dataset(url, auth=self.auth, **self.requests_kwargs)
return xr.open_dataset(xr.backends.NetCDF4DataStore(nc), **kw)
response = "ncCF"
url = self.get_download_url(response=response)
return to_xarray(url, response=response, auth=self.auth, **kw)

def to_iris(self, **kw):
"""Load the data request into an iris.CubeList.

Accepts any `iris.load_raw` keyword arguments.
"""
import iris

response = "nc" if self.protocol == "griddap" else "ncCF"
url = self.get_download_url(response=response, **kw)
data = urlopen(url, auth=self.auth, **self.requests_kwargs)
with _tempnc(data) as tmp:
cubes = iris.load_raw(tmp, **kw)
try:
cubes.realise_data()
except ValueError:
_ = [cube.data for cube in cubes]
return cubes
return to_iris(url, **kw)

@functools.lru_cache(maxsize=None)
def _get_variables(self, dataset_id: OptionalStr = None) -> Dict:
Expand Down
4 changes: 2 additions & 2 deletions tests/test_erddapy.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,13 @@ def test_erddap_requests_kwargs():
slowwly_url = f"https://flash-the-slow-api.herokuapp.com/delay/{slowwly_milliseconds}/url/{base_url}"

connection = ERDDAP(slowwly_url)
connection.dataset_id = "M01_sbe37_all"
connection.dataset_id = "raw_asset_inventory"
connection.protocol = "tabledap"

connection.requests_kwargs["timeout"] = timeout_seconds

with pytest.raises(httpx.ReadTimeout):
connection.to_xarray()
connection.to_pandas(requests_kwargs=connection.requests_kwargs)


@pytest.mark.web
Expand Down