Skip to content

Commit

Permalink
Include the database when deciding if two tables are the same in cata…
Browse files Browse the repository at this point in the history
…log generation

Convert catalog intermediate structure into something more useful
Make comparing manifests to catalogs faster by generating an explicit identifier to id mapping
Make the identifier to unique ID mapping include databases
Convert catalog to use dataclasses/hologram types
Fix unit tests to test what we actually care about
No changes to integration tests means no need to change dbt docs, hooray
  • Loading branch information
Jacob Beck committed Sep 20, 2019
1 parent e31a9af commit 6c81562
Show file tree
Hide file tree
Showing 6 changed files with 488 additions and 302 deletions.
4 changes: 0 additions & 4 deletions core/dbt/contracts/graph/compiled.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from dbt.contracts.graph.parsed import (
ParsedNode,
ParsedAnalysisNode,
ParsedDocumentation,
ParsedMacro,
ParsedModelNode,
ParsedHookNode,
ParsedRPCNode,
Expand Down Expand Up @@ -216,8 +214,6 @@ def parsed_instance_for(compiled: CompiledNode) -> ParsedNode:
CompiledSnapshotNode,
CompiledTestNode,
ParsedAnalysisNode,
ParsedDocumentation,
ParsedMacro,
ParsedModelNode,
ParsedHookNode,
ParsedRPCNode,
Expand Down
32 changes: 0 additions & 32 deletions core/dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,38 +367,6 @@ def get_resource_fqns(self):

return resource_fqns

def _filter_subgraph(self, subgraph, predicate):
"""
Given a subgraph of the manifest, and a predicate, filter
the subgraph using that predicate. Generates a list of nodes.
"""
to_return = []

for unique_id, item in subgraph.items():
if predicate(item):
to_return.append(item)

return to_return

def _model_matches_schema_and_table(self, schema, table, model):
if model.resource_type == NodeType.Source:
return (model.schema.lower() == schema.lower() and
model.identifier.lower() == table.lower())
return (model.schema.lower() == schema.lower() and
model.alias.lower() == table.lower())

def get_unique_ids_for_schema_and_table(self, schema, table):
"""
Given a schema and table, find matching models, and return
their unique_ids. A schema and table may have more than one
match if the relation matches both a source and a seed, for instance.
"""
def predicate(model):
return self._model_matches_schema_and_table(schema, table, model)

matching = list(self._filter_subgraph(self.nodes, predicate))
return [match.unique_id for match in matching]

def add_nodes(self, new_nodes):
"""Add the given dict of new nodes to the manifest."""
for unique_id, node in new_nodes.items():
Expand Down
4 changes: 4 additions & 0 deletions core/dbt/contracts/graph/parsed.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,10 @@ class ParsedNodeMandatory(
):
alias: str

@property
def identifier(self):
return self.alias


@dataclass
class ParsedNodeDefaults(ParsedNodeMandatory):
Expand Down
51 changes: 46 additions & 5 deletions core/dbt/helper_types.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
# never name this package "types", or mypy will crash in ugly ways
from datetime import timedelta
from typing import NewType

from hologram import (
FieldEncoder, JsonSchemaMixin, JsonDict, ValidationError
)

from datetime import timedelta
from typing import NewType
from dbt.utils import DECIMALS


Port = NewType('Port', int)
Bool = NewType('Bool', bool)


class PortEncoder(FieldEncoder):
Expand Down Expand Up @@ -37,7 +40,45 @@ def json_schema(self) -> JsonDict:
return {'type': 'number'}


JsonSchemaMixin.register_field_encoders({
class DecimalEncoder(FieldEncoder):
def to_wire(self, value) -> float:
return float(value)

def to_python(self, value):
if isinstance(value, DECIMALS):
return value
try:
return DECIMALS[0](value)
except TypeError:
raise ValidationError(
'cannot encode {} into decimal'.format(value)
) from None

@property
def json_schema(self) -> JsonDict:
return {'type': 'number'}


class BoolEncoder(FieldEncoder):
def to_wire(self, value) -> bool:
return value

def to_python(self, value) -> bool:
return value

@property
def json_schema(self) -> JsonDict:
return {'type': 'boolean'}


_ENCODERS = {
Port: PortEncoder(),
timedelta: TimeDeltaFieldEncoder()
})
timedelta: TimeDeltaFieldEncoder(),
Bool: BoolEncoder(),
}

for decimal_type in DECIMALS:
_ENCODERS[decimal_type] = DecimalEncoder()


JsonSchemaMixin.register_field_encoders(_ENCODERS)
Loading

0 comments on commit 6c81562

Please sign in to comment.