Skip to content
This repository has been archived by the owner on Nov 23, 2023. It is now read-only.

Commit

Permalink
refactor: Expand usage of STAC custom layout strategy
Browse files Browse the repository at this point in the history
Added extra test for when adding a dataset version with Item metadata
and expanded the usage of our custom layout strategy to avoid overwriting
existing links when creating a new dataset.
  • Loading branch information
Mitchell Paff authored and Mitchell Paff committed May 31, 2021
1 parent 8e6cee1 commit e7f6e85
Show file tree
Hide file tree
Showing 3 changed files with 191 additions and 12 deletions.
5 changes: 4 additions & 1 deletion backend/populate_catalog/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ def handle_root(dataset_prefix: str) -> None:
dataset_catalog = Catalog.from_file(f"{dataset_path}/{CATALOG_KEY}")

root_catalog.add_child(dataset_catalog)
root_catalog.normalize_hrefs(f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}")
root_catalog.normalize_hrefs(
f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}",
strategy=GeostoreSTACLayoutStrategy(),
)

root_catalog.save(catalog_type=CatalogType.SELF_CONTAINED)
5 changes: 3 additions & 2 deletions backend/stac_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@
STAC_TYPE_KEY = "type"
STAC_VERSION_KEY = "stac_version"
STAC_LINKS_KEY = "links"
STAC_REL_KEY = "rel"
STAC_REL_ROOT = "root"
STAC_REL_CHILD = "child"
STAC_REL_ITEM = "item"
STAC_REL_KEY = "rel"
STAC_REL_PARENT = "parent"
STAC_REL_ROOT = "root"
STAC_REL_SELF = "self"

STAC_COLLECTION_TYPE = "Collection"
Expand Down
193 changes: 184 additions & 9 deletions tests/test_populate_catalog.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from copy import deepcopy
from json import load
from typing import Any, Dict

import pytest
from _pytest.python_api import raises
from mypy_boto3_s3 import S3Client
from pytest import mark
Expand Down Expand Up @@ -36,6 +34,7 @@
STAC_ID_KEY,
STAC_LINKS_KEY,
STAC_REL_CHILD,
STAC_REL_ITEM,
STAC_REL_KEY,
STAC_REL_PARENT,
STAC_REL_ROOT,
Expand All @@ -50,6 +49,7 @@
from tests.stac_objects import (
MINIMAL_VALID_STAC_CATALOG_OBJECT,
MINIMAL_VALID_STAC_COLLECTION_OBJECT,
MINIMAL_VALID_STAC_ITEM_OBJECT,
)


Expand Down Expand Up @@ -221,20 +221,15 @@ def should_update_existing_root_catalog(subtests: SubTests) -> None:
assert dataset_catalog_json[STAC_LINKS_KEY] == expected_dataset_links


@pytest.mark.parametrize(
"stac_object", [MINIMAL_VALID_STAC_COLLECTION_OBJECT, MINIMAL_VALID_STAC_CATALOG_OBJECT]
)
@mark.infrastructure
def should_update_dataset_catalog_with_new_version(
stac_object: Dict[str, Any], subtests: SubTests
) -> None:
def should_update_dataset_catalog_with_new_version_catalog(subtests: SubTests) -> None:

dataset_version = any_dataset_version_id()
filename = f"{any_safe_filename()}.json"
with Dataset() as dataset, S3Object(
file_object=json_dict_to_file_object(
{
**deepcopy(stac_object),
**deepcopy(MINIMAL_VALID_STAC_CATALOG_OBJECT),
STAC_ID_KEY: dataset_version,
STAC_TITLE_KEY: dataset.title,
STAC_LINKS_KEY: [
Expand Down Expand Up @@ -357,6 +352,186 @@ def should_update_dataset_catalog_with_new_version(
assert version_json[STAC_LINKS_KEY] == expected_dataset_version_links


@mark.infrastructure
def should_update_dataset_catalog_with_new_version_collection(subtests: SubTests) -> None:
dataset_version = any_dataset_version_id()
collection_filename = f"{any_safe_filename()}.json"
item_filename = f"{any_safe_filename()}.json"

with Dataset() as dataset, S3Object(
file_object=json_dict_to_file_object(
{
**deepcopy(MINIMAL_VALID_STAC_ITEM_OBJECT),
STAC_ID_KEY: any_dataset_version_id(),
STAC_LINKS_KEY: [
{
STAC_REL_KEY: STAC_REL_ROOT,
STAC_HREF_KEY: f"./{collection_filename}",
STAC_TYPE_KEY: "application/json",
},
{
STAC_REL_KEY: STAC_REL_PARENT,
STAC_HREF_KEY: f"./{collection_filename}",
STAC_TYPE_KEY: "application/json",
},
],
}
),
bucket_name=ResourceName.STORAGE_BUCKET_NAME.value,
key=f"{dataset.dataset_prefix}/{dataset_version}/{item_filename}",
) as item_metadata, S3Object(
file_object=json_dict_to_file_object(
{
**deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT),
STAC_ID_KEY: dataset_version,
STAC_TITLE_KEY: dataset.title,
STAC_LINKS_KEY: [
{
STAC_REL_KEY: STAC_REL_ROOT,
STAC_HREF_KEY: f"./{collection_filename}",
STAC_TYPE_KEY: "application/json",
},
{
STAC_REL_KEY: STAC_REL_ITEM,
STAC_HREF_KEY: f"./{item_filename}",
STAC_TYPE_KEY: "application/json",
},
],
}
),
bucket_name=ResourceName.STORAGE_BUCKET_NAME.value,
key=f"{dataset.dataset_prefix}/{dataset_version}/{collection_filename}",
) as dataset_version_metadata, S3Object(
file_object=json_dict_to_file_object(
{
**deepcopy(MINIMAL_VALID_STAC_CATALOG_OBJECT),
STAC_ID_KEY: dataset.dataset_prefix,
STAC_TITLE_KEY: dataset.title,
STAC_LINKS_KEY: [
{
STAC_REL_KEY: STAC_REL_ROOT,
STAC_HREF_KEY: f"../{CATALOG_KEY}",
STAC_TYPE_KEY: "application/json",
},
{
STAC_REL_KEY: STAC_REL_PARENT,
STAC_HREF_KEY: f"../{CATALOG_KEY}",
STAC_TYPE_KEY: "application/json",
},
],
}
),
bucket_name=ResourceName.STORAGE_BUCKET_NAME.value,
key=f"{dataset.dataset_prefix}/{CATALOG_KEY}",
), S3Object(
file_object=json_dict_to_file_object(
{
**deepcopy(MINIMAL_VALID_STAC_CATALOG_OBJECT),
STAC_ID_KEY: ROOT_CATALOG_ID,
STAC_DESCRIPTION_KEY: ROOT_CATALOG_DESCRIPTION,
STAC_TITLE_KEY: ROOT_CATALOG_TITLE,
STAC_LINKS_KEY: [
{
STAC_REL_KEY: STAC_REL_ROOT,
STAC_HREF_KEY: f"./{CATALOG_KEY}",
STAC_TYPE_KEY: "application/json",
},
{
STAC_REL_KEY: STAC_REL_CHILD,
STAC_HREF_KEY: f"./{dataset.dataset_prefix}/{CATALOG_KEY}",
STAC_TYPE_KEY: "application/json",
},
],
}
),
bucket_name=ResourceName.STORAGE_BUCKET_NAME.value,
key=CATALOG_KEY,
):
expected_dataset_catalog_links: JsonList = [
{
STAC_REL_KEY: STAC_REL_ROOT,
STAC_HREF_KEY: f"../{CATALOG_KEY}",
STAC_TYPE_KEY: "application/json",
},
{
STAC_REL_KEY: STAC_REL_PARENT,
STAC_HREF_KEY: f"../{CATALOG_KEY}",
STAC_TYPE_KEY: "application/json",
},
{
STAC_REL_KEY: STAC_REL_CHILD,
STAC_HREF_KEY: f"./{dataset_version}/{collection_filename}",
STAC_TYPE_KEY: "application/json",
},
]
expected_dataset_version_links: JsonList = [
{
STAC_REL_KEY: STAC_REL_ROOT,
STAC_HREF_KEY: f"../../{CATALOG_KEY}",
STAC_TYPE_KEY: "application/json",
},
{
STAC_REL_KEY: STAC_REL_ITEM,
STAC_HREF_KEY: f"./{item_filename}",
STAC_TYPE_KEY: "application/json",
},
{
STAC_REL_KEY: STAC_REL_PARENT,
STAC_HREF_KEY: f"../{CATALOG_KEY}",
STAC_TYPE_KEY: "application/json",
},
]
expected_item_links: JsonList = [
{
STAC_REL_KEY: STAC_REL_ROOT,
STAC_HREF_KEY: f"../../{CATALOG_KEY}",
STAC_TYPE_KEY: "application/json",
},
{
STAC_REL_KEY: STAC_REL_PARENT,
STAC_HREF_KEY: f"./{collection_filename}",
STAC_TYPE_KEY: "application/json",
},
]

lambda_handler(
{
RECORDS_KEY: [
{
BODY_KEY: dataset_version_metadata.key,
MESSAGE_ATTRIBUTES_KEY: {
MESSAGE_ATTRIBUTE_TYPE_KEY: {
STRING_VALUE_KEY_LOWER: MESSAGE_ATTRIBUTE_TYPE_DATASET,
DATA_TYPE_KEY: DATA_TYPE_STRING,
}
},
}
]
},
any_lambda_context(),
)

with subtests.test(msg="dataset catalog links"), smart_open(
f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}/"
f"{dataset.dataset_prefix}/{CATALOG_KEY}"
) as updated_dataset_metadata_file:
catalog_json = load(updated_dataset_metadata_file)
assert catalog_json[STAC_LINKS_KEY] == expected_dataset_catalog_links

with subtests.test(msg="dataset version links"), smart_open(
f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}"
f"/{dataset_version_metadata.key}"
) as updated_dataset_metadata_file:
version_json = load(updated_dataset_metadata_file)
assert version_json[STAC_LINKS_KEY] == expected_dataset_version_links

with subtests.test(msg="item links"), smart_open(
f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}" f"/{item_metadata.key}"
) as updated_item_metadata_file:
item_json = load(updated_item_metadata_file)
assert item_json[STAC_LINKS_KEY] == expected_item_links


def should_fail_if_unknown_sqs_message_type() -> None:
with raises(UnhandledSQSMessageException):
lambda_handler(
Expand Down

0 comments on commit e7f6e85

Please sign in to comment.