Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detect breaking changes to constraints in state:modifed #7476

Merged
merged 12 commits into from
May 10, 2023
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20230503-101100.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Detect breaking changes to enforced constraints
time: 2023-05-03T10:11:00.617936-05:00
custom:
Author: emmyoop
Issue: "7065"
7 changes: 7 additions & 0 deletions core/dbt/adapters/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@ def get_include_paths(self, name: Optional[str]) -> List[Path]:
def get_adapter_type_names(self, name: Optional[str]) -> List[str]:
return [p.adapter.type() for p in self.get_adapter_plugins(name)]

def get_adapter_constraint_support(self, name: Optional[str]) -> List[str]:
return self.lookup_adapter(name).CONSTRAINT_SUPPORT # type: ignore


FACTORY: AdapterContainer = AdapterContainer()

Expand Down Expand Up @@ -214,6 +217,10 @@ def get_adapter_type_names(name: Optional[str]) -> List[str]:
return FACTORY.get_adapter_type_names(name)


def get_adapter_constraint_support(name: Optional[str]) -> List[str]:
return FACTORY.get_adapter_constraint_support(name)


@contextmanager
def adapter_management():
reset_adapters()
Expand Down
171 changes: 124 additions & 47 deletions core/dbt/contracts/graph/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ def same_config(self, old) -> bool:
def build_contract_checksum(self):
pass

def same_contract(self, old) -> bool:
def same_contract(self, old, adapter_type=None) -> bool:
# This would only apply to seeds
return True

Expand Down Expand Up @@ -489,13 +489,13 @@ def patch(self, patch: "ParsedNodePatch"):
)
)

def same_contents(self, old) -> bool:
def same_contents(self, old, adapter_type) -> bool:
if old is None:
return False

# Need to ensure that same_contract is called because it
# could throw an error
same_contract = self.same_contract(old)
same_contract = self.same_contract(old, adapter_type)
return (
self.same_body(old)
and self.same_config(old)
Expand Down Expand Up @@ -572,6 +572,48 @@ def depends_on_nodes(self):
def depends_on_macros(self):
return self.depends_on.macros


# ====================================
# CompiledNode subclasses
# ====================================


@dataclass
class AnalysisNode(CompiledNode):
resource_type: NodeType = field(metadata={"restrict": [NodeType.Analysis]})


@dataclass
class HookNode(CompiledNode):
resource_type: NodeType = field(metadata={"restrict": [NodeType.Operation]})
index: Optional[int] = None


@dataclass
class ModelNode(CompiledNode):
resource_type: NodeType = field(metadata={"restrict": [NodeType.Model]})
access: AccessType = AccessType.Protected
constraints: List[ModelLevelConstraint] = field(default_factory=list)
version: Optional[NodeVersion] = None
latest_version: Optional[NodeVersion] = None

@property
def is_latest_version(self) -> bool:
return self.version is not None and self.version == self.latest_version

@property
def search_name(self):
if self.version is None:
return self.name
else:
return f"{self.name}.v{self.version}"

@property
def materialization_enforces_constraints(self):
emmyoop marked this conversation as resolved.
Show resolved Hide resolved
if self.config.materialized in ["table", "incremental"]:
return True
return False

def build_contract_checksum(self):
# We don't need to construct the checksum if the model does not
# have contract enforced, because it won't be used.
Expand All @@ -584,10 +626,14 @@ def build_contract_checksum(self):
for column in sorted_columns:
contract_state += f"|{column.name}"
contract_state += str(column.data_type)
contract_state += str(column.constraints)
if self.materialization_enforces_constraints:
contract_state += self.config.materialized
contract_state += str(self.constraints)
data = contract_state.encode("utf-8")
self.contract.checksum = hashlib.new("sha256", data).hexdigest()

def same_contract(self, old) -> bool:
def same_contract(self, old, adapter_type=None) -> bool:
# If the contract wasn't previously enforced:
if old.contract.enforced is False and self.contract.enforced is False:
# No change -- same_contract: True
Expand All @@ -608,32 +654,99 @@ def same_contract(self, old) -> bool:
contract_enforced_disabled: bool = False
columns_removed: List[str] = []
column_type_changes: List[Tuple[str, str, str]] = []
enforced_column_constraint_removed: List[List[str]] = [] # column, constraint_type
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any reason not to use a list of Tuple[str, str] here and for enforced_model_constraint_removed? Feels more consistent with the implementation so far (of columns_removed) given the nested lists are of fixed length.

enforced_model_constraint_removed: List[
Tuple[str, List[str]]
] = [] # constraint_type, columns
materialization_changed: List[str] = []

if old.contract.enforced is True and self.contract.enforced is False:
# Breaking change: the contract was previously enforced, and it no longer is
contract_enforced_disabled = True

# TODO: this avoid the circular imports but isn't ideal
from dbt.adapters.factory import get_adapter_constraint_support
from dbt.adapters.base import ConstraintSupport

constraint_support = get_adapter_constraint_support(adapter_type)
column_constraints_exist = False

# Next, compare each column from the previous contract (old.columns)
for key, value in sorted(old.columns.items()):
for old_key, old_value in sorted(old.columns.items()):
# Has this column been removed?
if key not in self.columns.keys():
columns_removed.append(value.name)
if old_key not in self.columns.keys():
columns_removed.append(old_value.name)
# Has this column's data type changed?
elif value.data_type != self.columns[key].data_type:
elif old_value.data_type != self.columns[old_key].data_type:
column_type_changes.append(
(str(value.name), str(value.data_type), str(self.columns[key].data_type))
(
str(old_value.name),
str(old_value.data_type),
str(self.columns[old_key].data_type),
)
)

# track if there are any column level constraints for the materialization check late
if old_value.constraints:
column_constraints_exist = True

# Have enforced columns level constraints changed?
# Constraints are only enforced for table and incremental materializations.
# We only really care if the old node was one of those materializations for breaking changes
if (
old_key in self.columns.keys()
and old_value.constraints != self.columns[old_key].constraints
and old.materialization_enforces_constraints
):

for old_constraint in old_value.constraints:
if (
old_constraint not in self.columns[old_key].constraints
and constraint_support[old_constraint.type] == ConstraintSupport.ENFORCED
):
enforced_column_constraint_removed.append(
[old_key, str(old_constraint.type)]
)

# Now compare the model level constraints
if old.constraints != self.constraints and old.materialization_enforces_constraints:
for old_constraint in old.constraints:
if (
old_constraint not in self.constraints
and constraint_support[old_constraint.type] == ConstraintSupport.ENFORCED
):
enforced_model_constraint_removed.append(
(str(old_constraint.type), old_constraint.columns)
)

# Check for relevant materialization changes.
if (
old.materialization_enforces_constraints
and not self.materialization_enforces_constraints
and (old.constraints or column_constraints_exist)
):
materialization_changed = [old.config.materialized, self.config.materialized]

# If a column has been added, it will be missing in the old.columns, and present in self.columns
# That's a change (caught by the different checksums), but not a breaking change

# Did we find any changes that we consider breaking? If so, that's an error
if contract_enforced_disabled or columns_removed or column_type_changes:
if (
contract_enforced_disabled
or columns_removed
or column_type_changes
or enforced_model_constraint_removed
or enforced_column_constraint_removed
or materialization_changed
):
raise (
ContractBreakingChangeError(
contract_enforced_disabled=contract_enforced_disabled,
columns_removed=columns_removed,
column_type_changes=column_type_changes,
enforced_column_constraint_removed=enforced_column_constraint_removed,
enforced_model_constraint_removed=enforced_model_constraint_removed,
materialization_changed=materialization_changed,
node=self,
)
)
Expand All @@ -643,42 +756,6 @@ def same_contract(self, old) -> bool:
return False


# ====================================
# CompiledNode subclasses
# ====================================


@dataclass
class AnalysisNode(CompiledNode):
resource_type: NodeType = field(metadata={"restrict": [NodeType.Analysis]})


@dataclass
class HookNode(CompiledNode):
resource_type: NodeType = field(metadata={"restrict": [NodeType.Operation]})
index: Optional[int] = None


@dataclass
class ModelNode(CompiledNode):
resource_type: NodeType = field(metadata={"restrict": [NodeType.Model]})
access: AccessType = AccessType.Protected
constraints: List[ModelLevelConstraint] = field(default_factory=list)
version: Optional[NodeVersion] = None
latest_version: Optional[NodeVersion] = None

@property
def is_latest_version(self) -> bool:
return self.version is not None and self.version == self.latest_version

@property
def search_name(self):
if self.version is None:
return self.name
else:
return f"{self.name}.v{self.version}"


# TODO: rm?
@dataclass
class RPCNode(CompiledNode):
Expand Down Expand Up @@ -864,7 +941,7 @@ class GenericTestNode(TestShouldStoreFailures, CompiledNode, HasTestMetadata):
config: TestConfig = field(default_factory=TestConfig) # type: ignore
attached_node: Optional[str] = None

def same_contents(self, other) -> bool:
def same_contents(self, other, adapter_type: Optional[str]) -> bool:
if other is None:
return False

Expand Down
35 changes: 33 additions & 2 deletions core/dbt/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import re
import io
import agate
from typing import Any, Dict, List, Mapping, Optional, Union
from typing import Any, Dict, List, Mapping, Optional, Tuple, Union

from dbt.dataclass_schema import ValidationError
from dbt.events.helpers import env_secrets, scrub_secrets
Expand Down Expand Up @@ -212,11 +212,21 @@ class ContractBreakingChangeError(DbtRuntimeError):
MESSAGE = "Breaking Change to Contract"

def __init__(
self, contract_enforced_disabled, columns_removed, column_type_changes, node=None
self,
contract_enforced_disabled: bool,
columns_removed: List[str],
column_type_changes: List[Tuple[str, str, str]],
enforced_column_constraint_removed: List[List[str]],
enforced_model_constraint_removed: List[Tuple[str, List[str]]],
materialization_changed: List[str],
node=None,
):
self.contract_enforced_disabled = contract_enforced_disabled
self.columns_removed = columns_removed
self.column_type_changes = column_type_changes
self.enforced_column_constraint_removed = enforced_column_constraint_removed
self.enforced_model_constraint_removed = enforced_model_constraint_removed
self.materialization_changed = materialization_changed
super().__init__(self.message(), node)

@property
Expand All @@ -237,6 +247,27 @@ def message(self):
breaking_changes.append(
f"Columns with data_type changes: \n - {column_type_changes_str}"
)
if self.enforced_column_constraint_removed:
column_constraint_changes_str = "\n - ".join(
[f"{c[0]} ({c[1]})" for c in self.enforced_column_constraint_removed]
)
breaking_changes.append(
f"Enforced column level constraints were removed: \n - {column_constraint_changes_str}"
)
if self.enforced_model_constraint_removed:
model_constraint_changes_str = "\n - ".join(
[f"{c[0]} -> {c[1]}" for c in self.enforced_model_constraint_removed]
)
breaking_changes.append(
f"Enforced model level constraints were removed: \n - {model_constraint_changes_str}"
)
if self.materialization_changed:
materialization_changes_str = "\n - ".join(
f"{self.materialization_changed[0]} -> {self.materialization_changed[1]}"
)
breaking_changes.append(
f"Materialization changed with enforced constraints: \n - {materialization_changes_str}"
)

reasons = "\n\n".join(breaking_changes)

Expand Down
2 changes: 1 addition & 1 deletion core/dbt/graph/selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def can_select_indirectly(node):


class NodeSelector(MethodManager):
"""The node selector is aware of the graph and manifest,"""
"""The node selector is aware of the graph and manifest"""

def __init__(
self,
Expand Down
Loading