Merge branch 'master' into use-dask.array

PyPSA · Sep 8, 2024 · 90bec1d · 90bec1d
2 parents 5d6aa4b + fec43a4
commit 90bec1d
Show file tree

Hide file tree

Showing 14 changed files with 251 additions and 119 deletions.
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
@@ -0,0 +1,92 @@
+# For most projects, this workflow file will not need changing; you simply need
+# to commit it to your repository.
+#
+# You may wish to alter this file to override the set of languages analyzed,
+# or to provide custom queries or build logic.
+#
+# ******** NOTE ********
+# We have attempted to detect the languages in your repository. Please check
+# the `language` matrix defined below to confirm you have the correct set of
+# supported CodeQL languages.
+#
+name: "CodeQL"
+
+on:
+  push:
+    branches: ["master"]
+  pull_request:
+    branches: ["master"]
+  schedule:
+  - cron: '32 11 * * 6'
+
+jobs:
+  analyze:
+    name: Analyze (${{ matrix.language }})
+    # Runner size impacts CodeQL analysis time. To learn more, please see:
+    #   - https://gh.io/recommended-hardware-resources-for-running-codeql
+    #   - https://gh.io/supported-runners-and-hardware-resources
+    #   - https://gh.io/using-larger-runners (github.com only)
+    # Consider using larger runners or machines with greater resources for possible analysis time improvements.
+    runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
+    permissions:
+      # required for all workflows
+      security-events: write
+
+      # required to fetch internal or private CodeQL packs
+      packages: read
+
+      # only required for workflows in private repositories
+      actions: read
+      contents: read
+
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+        - language: python
+          build-mode: none
+        # CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift'
+        # Use `c-cpp` to analyze code written in C, C++ or both
+        # Use 'java-kotlin' to analyze code written in Java, Kotlin or both
+        # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
+        # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis,
+        # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning.
+        # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how
+        # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    # Initializes the CodeQL tools for scanning.
+    - name: Initialize CodeQL
+      uses: github/codeql-action/init@v3
+      with:
+        languages: ${{ matrix.language }}
+        build-mode: ${{ matrix.build-mode }}
+        # If you wish to specify custom queries, you can do so here or in a config file.
+        # By default, queries listed here will override any specified in a config file.
+        # Prefix the list here with "+" to use these queries and those in the config file.
+
+        # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
+        # queries: security-extended,security-and-quality
+
+    # If the analyze step fails for one of the languages you are analyzing with
+    # "We were unable to automatically build your code", modify the matrix above
+    # to set the build mode to "manual" for that language. Then modify this step
+    # to build your code.
+    # ℹ️ Command-line programs to run using the OS shell.
+    # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
+    - if: matrix.build-mode == 'manual'
+      shell: bash
+      run: |
+        echo 'If you are using a "manual" build mode for one or more of the' \
+          'languages you are analyzing, replace this with the commands to build' \
+          'your code, for example:'
+        echo '  make bootstrap'
+        echo '  make release'
+        exit 1
+
+    - name: Perform CodeQL Analysis
+      uses: github/codeql-action/analyze@v3
+      with:
+        category: "/language:${{matrix.language}}"
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -45,7 +45,7 @@ repos:
 
 # Formatting with "black" coding style
 - repo: https://github.com/psf/black
-  rev: 24.4.2
+  rev: 24.8.0
   hooks:
   # Format Python files
   - id: black

diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst
@@ -13,6 +13,31 @@ Upcoming Release
 
 * Use ``dask.array`` functions in favour of ``numpy`` functions.
 
+Version 0.2.14
+==============
+
+* Compatibility with new CDS infrastructure for ERA5 cutouts. Update your API
+  key at https://cds-beta.climate.copernicus.eu/how-to-api and use the new API
+  endpoint ``https://cds-beta.climate.copernicus.eu/api`` in your
+  ``~/.cdsapirc`` file. The old CDS infrastructure can still be accessed when
+  the ``~/.cdsapirc`` uses the old endpoint.
+
+* Adds option to toggle whether ERA5 downloads are requested in monthly or
+  annual chunks with keyword argument ``cutout.prepare(monthly_requests=True)``.
+  The default is now annual requests. The monthly requests can also be posted
+  concurrently using ``cutout.prepare(monthly_requests=True,
+  concurrent_requests=True)``.
+
+* Improved parallelization of ``atlite.convert.build_line_rating`` by adding
+  keyword arguments for ``dask.compute`` (``dask_kwargs={}``) and an option to
+  disable the progressbar (``show_progress=False``).
+
+* Default to ``show_progress=False`` for performance reasons.
+
+* Numpy version temporarily limited to <2.
+
+- Remove long deprecated functions and cutout arguments / attributes.
+
 Version 0.2.13
 ==============
 

diff --git a/atlite/convert.py b/atlite/convert.py
@@ -53,7 +53,7 @@ def convert_and_aggregate(
     return_capacity=False,
     capacity_factor=False,
     capacity_factor_timeseries=False,
-    show_progress=True,
+    show_progress=False,
     dask_kwargs={},
     **convert_kwds,
 ):
@@ -93,7 +93,7 @@ def convert_and_aggregate(
     capacity_factor_timeseries : boolean
         If True, the capacity factor time series of the chosen resource for
         each grid cell is computed.
-    show_progress : boolean, default True
+    show_progress : boolean, default False
         Whether to show a progress bar.
     dask_kwargs : dict, default {}
         Dict with keyword arguments passed to `dask.compute`.
@@ -884,7 +884,7 @@ def hydro(
     hydrobasins,
     flowspeed=1,
     weight_with_height=False,
-    show_progress=True,
+    show_progress=False,
     **kwargs,
 ):
     """
@@ -1047,7 +1047,9 @@ def convert_line_rating(
     return Imax.min("spatial") if isinstance(Imax, xr.DataArray) else Imax
 
 
-def line_rating(cutout, shapes, line_resistance, **params):
+def line_rating(
+    cutout, shapes, line_resistance, show_progress=False, dask_kwargs={}, **params
+):
     """
     Create a dynamic line rating time series based on the IEEE-738 standard.
 
@@ -1072,6 +1074,10 @@ def line_rating(cutout, shapes, line_resistance, **params):
     line_resistance : float/series
         Resistance of the lines in Ohm/meter. Alternatively in p.u. system in
         Ohm/1000km (see example below).
+    show_progress : boolean, default False
+        Whether to show a progress bar.
+    dask_kwargs : dict, default {}
+        Dict with keyword arguments passed to `dask.compute`.
     params : keyword arguments as float/series
         Arguments to tweak/modify the line rating calculations based on [1].
         Defaults are:
@@ -1146,7 +1152,10 @@ def get_azimuth(shape):
             res.append(delayed(convert_line_rating)(ds, *df.iloc[i].values))
         else:
             res.append(dummy)
-    with ProgressBar():
-        res = compute(res)
+    if show_progress:
+        with ProgressBar(minimum=2):
+            res = compute(res, **dask_kwargs)
+    else:
+        res = compute(res, **dask_kwargs)
 
     return xr.concat(*res, dim=df.index).assign_attrs(units="A")
diff --git a/atlite/cutout.py b/atlite/cutout.py
@@ -139,15 +139,6 @@ def __init__(self, path, **cutoutparams):
             Whether to open dataset in parallel mode. Take effect for all
             xr.open_mfdataset usages.
         """
-        name = cutoutparams.get("name", None)
-        cutout_dir = cutoutparams.get("cutout_dir", None)
-        if cutout_dir or name or Path(path).is_dir():
-            raise ValueError(
-                "Old style format not supported. You can migrate the old "
-                "cutout directory using the function "
-                "`atlite.utils.migrate_from_cutout_directory()`. The argument "
-                "`cutout_dir` and `name` have been deprecated in favour of `path`."
-            )
 
         path = Path(path).with_suffix(".nc")
         chunks = cutoutparams.pop("chunks", {"time": 100})
@@ -156,31 +147,6 @@ def __init__(self, path, **cutoutparams):
         else:
             storable_chunks = {}
 
-        # Backward compatibility for xs, ys, months and years
-        if {"xs", "ys"}.intersection(cutoutparams):
-            warn(
-                "The arguments `xs` and `ys` have been deprecated in favour of "
-                "`x` and `y`",
-                DeprecationWarning,
-            )
-            if "xs" in cutoutparams:
-                cutoutparams["x"] = cutoutparams.pop("xs")
-            if "ys" in cutoutparams:
-                cutoutparams["y"] = cutoutparams.pop("ys")
-
-        if {"years", "months"}.intersection(cutoutparams):
-            warn(
-                "The arguments `years` and `months` have been deprecated in "
-                "favour of `time`",
-                DeprecationWarning,
-            )
-            assert "years" in cutoutparams
-            months = cutoutparams.pop("months", slice(1, 12))
-            years = cutoutparams.pop("years")
-            cutoutparams["time"] = slice(
-                f"{years.start}-{months.start}", f"{years.stop}-{months.stop}"
-            )
-
         # Three cases. First, cutout exists -> take the data.
         # Second, data is given -> take it. Third, else -> build a new cutout
         if path.is_file():
@@ -279,20 +245,6 @@ def coords(self):
         """
         return self.data.coords
 
-    @property
-    def meta(self):
-        """
-        Metadata of the cutout.
-
-        Deprecated since v0.2.
-        """
-        warn(
-            "The `meta` attribute is deprecated in favour of direct "
-            "access to `data`",
-            DeprecationWarning,
-        )
-        return xr.Dataset(self.coords, attrs=self.data.attrs)
-
     @property
     def shape(self):
         """

diff --git a/atlite/data.py b/atlite/data.py
@@ -25,7 +25,14 @@
 from atlite.datasets import modules as datamodules
 
 
-def get_features(cutout, module, features, tmpdir=None):
+def get_features(
+    cutout,
+    module,
+    features,
+    tmpdir=None,
+    monthly_requests=False,
+    concurrent_requests=False,
+):
     """
     Load the feature data for a given module.
 
@@ -39,7 +46,13 @@ def get_features(cutout, module, features, tmpdir=None):
 
     for feature in features:
         feature_data = delayed(get_data)(
-            cutout, feature, tmpdir=tmpdir, lock=lock, **parameters
+            cutout,
+            feature,
+            tmpdir=tmpdir,
+            lock=lock,
+            monthly_requests=monthly_requests,
+            concurrent_requests=concurrent_requests,
+            **parameters,
         )
         datasets.append(feature_data)
 
@@ -115,6 +128,10 @@ def cutout_prepare(
     tmpdir=None,
     overwrite=False,
     compression={"zlib": True, "complevel": 9, "shuffle": True},
+    show_progress=False,
+    dask_kwargs=None,
+    monthly_requests=False,
+    concurrent_requests=False,
 ):
     """
     Prepare all or a selection of features in a cutout.
@@ -147,12 +164,26 @@ def cutout_prepare(
         To efficiently reduce cutout sizes, specify the number of 'least_significant_digits': n here.
         To disable compression, set "complevel" to None.
         Default is {'zlib': True, 'complevel': 9, 'shuffle': True}.
+    show_progress : bool, optional
+        If True, a progress bar is shown. The default is False.
+    dask_kwargs : dict, default {}
+        Dict with keyword arguments passed to `dask.compute`.
+    monthly_requests : bool, optional
+        If True, the data is requested on a monthly basis in ERA5. This is useful for
+        large cutouts, where the data is requested in smaller chunks. The
+        default is False
+    concurrent_requests : bool, optional
+        If True, the monthly data requests are posted concurrently.
+        Only has an effect if `monthly_requests` is True. The default is False.
 
     Returns
     -------
     cutout : atlite.Cutout
         Cutout with prepared data. The variables are stored in `cutout.data`.
     """
+    if dask_kwargs is None:
+        dask_kwargs = {}
+
     if cutout.prepared and not overwrite:
         logger.info("Cutout already prepared.")
         return cutout
@@ -174,7 +205,14 @@ def cutout_prepare(
             continue
         logger.info(f"Calculating and writing with module {module}:")
         missing_features = missing_vars.index.unique("feature")
-        ds = get_features(cutout, module, missing_features, tmpdir=tmpdir)
+        ds = get_features(
+            cutout,
+            module,
+            missing_features,
+            tmpdir=tmpdir,
+            monthly_requests=monthly_requests,
+            concurrent_requests=concurrent_requests,
+        )
         prepared |= set(missing_features)
 
         cutout.data.attrs.update(dict(prepared_features=list(prepared)))
@@ -198,8 +236,11 @@ def cutout_prepare(
         # Delayed writing for large cutout
         # cf. https://stackoverflow.com/questions/69810367/python-how-to-write-large-netcdf-with-xarray
         write_job = ds.to_netcdf(tmp, compute=False)
-        with ProgressBar():
-            write_job.compute()
+        if show_progress:
+            with ProgressBar(minimum=2):
+                write_job.compute(**dask_kwargs)
+        else:
+            write_job.compute(**dask_kwargs)
         if cutout.path.exists():
             cutout.data.close()
             cutout.path.unlink()