Skip to content

Commit

Permalink
Fix: Export data caterer not using server details (#430)
Browse files Browse the repository at this point in the history
* Add in export to data-caterer YAML format

* Fix: Use server details from data contract when exporting to data caterer format, use location as file pathway if available

* Allow for data caterer export to use specified server information, remove entry from changelog
  • Loading branch information
pflooky committed Sep 19, 2024
1 parent e44d42c commit 95fe7a2
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 17 deletions.
23 changes: 20 additions & 3 deletions datacontract/export/data_caterer_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,27 @@ def export(self, data_contract, model, server, sql_server_type, export_args) ->
return to_data_caterer_generate_yaml(data_contract, server)


def to_data_caterer_generate_yaml(data_contract_spec: DataContractSpecification, server: Server):
def to_data_caterer_generate_yaml(data_contract_spec: DataContractSpecification, server):
generation_task = {"name": data_contract_spec.info.title, "steps": []}
server_info = _get_server_info(data_contract_spec, server)

for model_key, model_value in data_contract_spec.models.items():
odcs_table = _to_data_caterer_generate_step(model_key, model_value, server)
odcs_table = _to_data_caterer_generate_step(model_key, model_value, server_info)
generation_task["steps"].append(odcs_table)
return yaml.dump(generation_task, indent=2, sort_keys=False, allow_unicode=True)


def _get_server_info(data_contract_spec: DataContractSpecification, server):
if server is not None and server in data_contract_spec.servers:
return data_contract_spec.servers.get(server)
elif server is not None:
raise Exception(f"Server name not found in servers list in data contract, server-name={server}")
elif len(data_contract_spec.servers.keys()) > 0:
return next(iter(data_contract_spec.servers.values()))
else:
return None


def _to_data_caterer_generate_step(model_key, model_value: Model, server: Server) -> dict:
step = {
"name": model_key,
Expand Down Expand Up @@ -52,7 +64,12 @@ def _to_data_source_options(model_key, server: Server):
options = {}
if server is not None and server.type is not None:
if server.type in ["s3", "gcs", "azure", "local"]:
options["path"] = server.path
if server.path is not None:
options["path"] = server.path
elif server.location is not None:
options["path"] = server.location
else:
options["path"] = "/tmp/data_caterer_data"
elif server.type == "postgres":
options["schema"] = server.schema_
options["table"] = model_key
Expand Down
15 changes: 10 additions & 5 deletions tests/fixtures/data-caterer/export/datacontract_nested.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,16 @@ terms:
billing: free
noticePeriod: P3M
servers:
production:
type: snowflake
account: my-account
database: my-database
schema: my-schema
s3-json:
type: s3
location: s3://covid19-lake/enigma-jhu/json/*.json
format: json
delimiter: new_line
s3-json-prod:
type: s3
location: s3://covid19-lake-prod/enigma-jhu/json/*.json
format: json
delimiter: new_line
models:
orders:
description: The orders model
Expand Down
35 changes: 26 additions & 9 deletions tests/test_export_data_caterer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from datacontract.cli import app
from datacontract.export.data_caterer_converter import to_data_caterer_generate_yaml
from datacontract.model.data_contract_specification import DataContractSpecification, Server
from datacontract.model.data_contract_specification import DataContractSpecification


def test_cli():
Expand All @@ -19,12 +19,34 @@ def test_to_data_caterer():
data_contract = DataContractSpecification.from_string(
read_file("fixtures/data-caterer/export/datacontract_nested.yaml")
)
expected_data_caterer_model = """
expected_data_caterer_model = _get_expected_data_caterer_yaml("s3://covid19-lake/enigma-jhu/json/*.json")

data_caterer_yaml = to_data_caterer_generate_yaml(data_contract, None)
result = yaml.safe_load(data_caterer_yaml)

assert result == yaml.safe_load(expected_data_caterer_model)


def test_to_data_caterer_with_server():
data_contract = DataContractSpecification.from_string(
read_file("fixtures/data-caterer/export/datacontract_nested.yaml")
)
expected_data_caterer_model = _get_expected_data_caterer_yaml("s3://covid19-lake-prod/enigma-jhu/json/*.json")

data_caterer_yaml = to_data_caterer_generate_yaml(data_contract, "s3-json-prod")
result = yaml.safe_load(data_caterer_yaml)

assert result == yaml.safe_load(expected_data_caterer_model)


def _get_expected_data_caterer_yaml(path: str):
return f"""
name: Orders Unit Test
steps:
- name: orders
type: csv
options: {}
type: json
options:
path: {path}
schema:
- name: order_id
type: string
Expand Down Expand Up @@ -58,11 +80,6 @@ def test_to_data_caterer():
type: string
"""

data_caterer_yaml = to_data_caterer_generate_yaml(data_contract, Server())
result = yaml.safe_load(data_caterer_yaml)

assert result == yaml.safe_load(expected_data_caterer_model)


def read_file(file):
if not os.path.exists(file):
Expand Down

0 comments on commit 95fe7a2

Please sign in to comment.