Skip to content

Commit

Permalink
Merge pull request #263 from lsst-sqre/tickets/DM-39325
Browse files Browse the repository at this point in the history
DM-39325: Move cachemachine configuration into business options
  • Loading branch information
rra committed May 22, 2023
2 parents f6f7044 + a808713 commit 4a65a32
Show file tree
Hide file tree
Showing 8 changed files with 282 additions and 203 deletions.
3 changes: 3 additions & 0 deletions changelog.d/20230522_120303_rra_DM_39325.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
### Backwards-incompatible changes

- Configuration of whether to use cachemachine and, if so, what image policy to use is now done at the business level instead of globally. This allows the same mobu instance to test both Nublado v2 and Nublado v3.
33 changes: 0 additions & 33 deletions src/mobu/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,17 @@

from __future__ import annotations

from enum import Enum
from pathlib import Path

from pydantic import BaseSettings, Field, HttpUrl
from safir.logging import LogLevel, Profile

__all__ = [
"CachemachinePolicy",
"Configuration",
"config",
]


class CachemachinePolicy(Enum):
"""Policy for what eligible images to retrieve from cachemachine."""

available = "available"
desired = "desired"


class Configuration(BaseSettings):
"""Configuration for mobu."""

Expand Down Expand Up @@ -61,30 +52,6 @@ class Configuration(BaseSettings):
example="https://data.example.org/",
)

use_cachemachine: bool = Field(
True,
field="Whether to use cachemachine to look up an image",
description=(
"Set this to false in environments using the new Nublado lab"
" controller."
),
env="USE_CACHEMACHINE",
example=False,
)

cachemachine_image_policy: CachemachinePolicy = Field(
CachemachinePolicy.available,
field="Class of cachemachine images to use",
description=(
"Whether to use the images available on all nodes, or the images"
" desired by cachemachine. In instances where image streaming is"
" enabled and therefore pulls are fast, ``desired`` is preferred."
" The default is ``available``."
),
env="CACHEMACHINE_IMAGE_POLICY",
example=CachemachinePolicy.desired,
)

gafaelfawr_token: str | None = Field(
None,
field="Gafaelfawr admin token used to create user tokens",
Expand Down
6 changes: 3 additions & 3 deletions src/mobu/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ class MobuSlackException(SlackException):
def __init__(
self,
msg: str,
user: str,
user: str | None = None,
*,
started_at: datetime | None = None,
failed_at: datetime | None = None,
Expand Down Expand Up @@ -279,8 +279,8 @@ class NotebookRepositoryError(MobuSlackException):
class CachemachineError(MobuSlackException):
"""Failed to obtain a valid image list from cachemachine."""

def __init__(self, msg: str, user: str) -> None:
super().__init__(user, f"Cachemachine error: {msg}")
def __init__(self, msg: str) -> None:
super().__init__(f"Cachemachine error: {msg}")


class CodeExecutionError(MobuSlackException):
Expand Down
38 changes: 38 additions & 0 deletions src/mobu/models/business/nublado.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,26 @@
from .base import BusinessData, BusinessOptions

__all__ = [
"CachemachinePolicy",
"NubladoBusinessData",
"NubladoBusinessOptions",
"NubladoImage",
"NubladoImageByClass",
"NubladoImageByReference",
"NubladoImageByTag",
"NubladoImageClass",
"NubladoImageSize",
"RunningImage",
]


class CachemachinePolicy(Enum):
"""Policy for what eligible images to retrieve from cachemachine."""

available = "available"
desired = "desired"


class NubladoImageClass(str, Enum):
"""Possible ways of selecting an image."""

Expand Down Expand Up @@ -132,6 +147,19 @@ def to_spawn_form(self) -> dict[str, str]:
class NubladoBusinessOptions(BusinessOptions):
"""Options for any business that runs code in a Nublado lab."""

cachemachine_image_policy: CachemachinePolicy = Field(
CachemachinePolicy.available,
field="Class of cachemachine images to use",
description=(
"Whether to use the images available on all nodes, or the images"
" desired by cachemachine. In instances where image streaming is"
" enabled and therefore pulls are fast, ``desired`` is preferred."
" The default is ``available``. Only used if ``use_cachemachine``"
" is true."
),
example=CachemachinePolicy.desired,
)

delete_lab: bool = Field(
True,
title="Whether to delete the lab between iterations",
Expand Down Expand Up @@ -212,6 +240,16 @@ class NubladoBusinessOptions(BusinessOptions):

url_prefix: str = Field("/nb/", title="URL prefix for JupyterHub")

use_cachemachine: bool = Field(
True,
field="Whether to use cachemachine to look up an image",
description=(
"Set this to false in environments using the new Nublado lab"
" controller."
),
example=False,
)

working_directory: str | None = Field(
None,
title="Working directory when running code",
Expand Down
26 changes: 22 additions & 4 deletions src/mobu/services/business/nublado.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@
from safir.slack.blockkit import SlackException
from structlog.stdlib import BoundLogger

from ...config import config
from ...exceptions import JupyterSpawnError, JupyterTimeoutError
from ...models.business.nublado import (
NubladoBusinessData,
NubladoBusinessOptions,
RunningImage,
)
from ...models.user import AuthenticatedUser
from ...storage.cachemachine import CachemachineClient
from ...storage.jupyter import JupyterClient, JupyterLabSession
from .base import Business

Expand Down Expand Up @@ -102,12 +104,27 @@ def __init__(
logger: BoundLogger,
) -> None:
super().__init__(options, user, http_client, logger)

if not config.environment_url:
raise RuntimeError("environment_url not set")
environment_url = str(config.environment_url).rstrip("/")
cachemachine = None
if options.use_cachemachine:
if not config.gafaelfawr_token:
raise RuntimeError("GAFAELFAWR_TOKEN not set")
cachemachine = CachemachineClient(
url=environment_url + "/cachemachine/jupyter",
token=config.gafaelfawr_token,
http_client=http_client,
image_policy=options.cachemachine_image_policy,
)
self._client = JupyterClient(
user=user,
url_prefix=options.url_prefix,
image_config=options.image,
base_url=environment_url + options.url_prefix,
cachemachine=cachemachine,
logger=logger,
)

self._image: RunningImage | None = None
self._node: str | None = None
self._random = SystemRandom()
Expand Down Expand Up @@ -200,7 +217,7 @@ async def hub_login(self) -> None:
async def spawn_lab(self) -> bool:
with self.timings.start("spawn_lab", self.annotations()) as sw:
timeout = self.options.spawn_timeout
await self._client.spawn_lab()
await self._client.spawn_lab(self.options.image)

# Pause before using the progress API, since otherwise it may not
# have attached to the spawner and will not return a full stream
Expand Down Expand Up @@ -248,8 +265,9 @@ async def open_session(
self, notebook: str | None = None
) -> AsyncIterator[JupyterLabSession]:
self.logger.info("Creating lab session")
opts = {"max_websocket_size": self.options.max_websocket_message_size}
stopwatch = self.timings.start("create_session", self.annotations())
async with self._client.open_lab_session(notebook) as session:
async with self._client.open_lab_session(notebook, **opts) as session:
stopwatch.stop()
with self.timings.start("execute_setup", self.annotations()):
await self.setup_session(session)
Expand Down
46 changes: 30 additions & 16 deletions src/mobu/storage/cachemachine.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,13 @@
from httpx import AsyncClient, HTTPError, HTTPStatusError
from pydantic import BaseModel, Field

from ..config import config
from ..exceptions import CachemachineError
from ..models.business.nublado import CachemachinePolicy

__all__ = [
"CachemachineClient",
"JupyterCachemachineImage",
]


class JupyterCachemachineImage(BaseModel):
Expand Down Expand Up @@ -85,21 +90,30 @@ class CachemachineClient:
which image is recommended and what the latest weeklies are. This client
queries it and returns the image that matches some selection criteria.
The resulting string can be passed in to the JupyterHub options form.
Parameters
----------
url
URL for cachemachine.
token
Token to use to authenticate to cachemachine.
http_client
HTTP client to use.
image_policy
Cachemachine image policy to use for resolving images.
"""

def __init__(
self, http_client: AsyncClient, token: str, username: str
self,
url: str,
token: str,
http_client: AsyncClient,
*,
image_policy: CachemachinePolicy = CachemachinePolicy.desired,
) -> None:
self._http_client = http_client
self._token = token
self._username = username
if not config.environment_url:
raise RuntimeError("environment_url not set")
self._url = (
str(config.environment_url).rstrip("/")
+ "/cachemachine/jupyter/"
+ config.cachemachine_image_policy.value
)
self._http_client = http_client
self._url = url.rstrip("/") + "/" + image_policy.value

async def get_latest_weekly(self) -> JupyterCachemachineImage:
"""Image for the latest weekly version.
Expand All @@ -118,7 +132,7 @@ async def get_latest_weekly(self) -> JupyterCachemachineImage:
for image in await self._get_images():
if image.name.startswith("Weekly"):
return image
raise CachemachineError(self._username, "No weekly images found")
raise CachemachineError("No weekly images found")

async def get_recommended(self) -> JupyterCachemachineImage:
"""Image string for the latest recommended version.
Expand All @@ -135,7 +149,7 @@ async def get_recommended(self) -> JupyterCachemachineImage:
"""
images = await self._get_images()
if not images or not images[0]:
raise CachemachineError(self._username, "No images found")
raise CachemachineError("No images found")
return images[0]

async def _get_images(self) -> list[JupyterCachemachineImage]:
Expand All @@ -145,10 +159,10 @@ async def _get_images(self) -> list[JupyterCachemachineImage]:
r.raise_for_status()
except HTTPStatusError as e:
msg = f"Cannot get image status: {e.response.status_code}"
raise CachemachineError(self._username, msg) from e
raise CachemachineError(msg) from e
except HTTPError as e:
msg = f"Cannot get image status: {type(e).__name__}: {e!s}"
raise CachemachineError(self._username, msg) from e
raise CachemachineError(msg) from e

try:
data = r.json()
Expand All @@ -157,4 +171,4 @@ async def _get_images(self) -> list[JupyterCachemachineImage]:
]
except Exception as e:
msg = f"Invalid response: {type(e).__name__}: {e!s}"
raise CachemachineError(self._username, msg) from e
raise CachemachineError(msg) from e
Loading

0 comments on commit 4a65a32

Please sign in to comment.