Skip to content

Commit

Permalink
Make seeds ref'able, provide for seed configuration (#668)
Browse files Browse the repository at this point in the history
* Make seeds ref'able, provide for seed configuration

Fixes:
 - #106
 - #561

* pep8, s/data/seeds/g

* add tests, truncate log output for seed sql
  • Loading branch information
drewbanin committed Feb 28, 2018
1 parent 262ab34 commit a5d17a3
Show file tree
Hide file tree
Showing 12 changed files with 158 additions and 23 deletions.
2 changes: 1 addition & 1 deletion dbt/adapters/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ def warning_on_hooks(cls, hook_type):

@classmethod
def add_query(cls, profile, sql, model_name=None, auto_begin=True,
bindings=None):
bindings=None, abridge_sql_log=False):
if model_name in ['on-run-start', 'on-run-end']:
cls.warning_on_hooks(model_name)
else:
Expand Down
7 changes: 5 additions & 2 deletions dbt/adapters/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,7 @@ def close(cls, connection):

@classmethod
def add_query(cls, profile, sql, model_name=None, auto_begin=True,
bindings=None):
bindings=None, abridge_sql_log=False):
connection = cls.get_connection(profile, model_name)
connection_name = connection.get('name')

Expand All @@ -535,7 +535,10 @@ def add_query(cls, profile, sql, model_name=None, auto_begin=True,
.format(cls.type(), connection_name))

with cls.exception_handler(profile, sql, model_name, connection_name):
logger.debug('On %s: %s', connection_name, sql)
if abridge_sql_log:
logger.debug('On %s: %s....', connection_name, sql[0:512])
else:
logger.debug('On %s: %s', connection_name, sql)
pre = time.time()

cursor = connection.get('handle').cursor()
Expand Down
2 changes: 1 addition & 1 deletion dbt/adapters/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,4 +235,4 @@ def load_csv_rows(cls, profile, schema, table_name, agate_table):
cols_sql,
",\n".join(placeholders)))

cls.add_query(profile, sql, bindings=bindings)
cls.add_query(profile, sql, bindings=bindings, abridge_sql_log=True)
4 changes: 2 additions & 2 deletions dbt/adapters/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def check_schema_exists(cls, profile, schema, model_name=None):

@classmethod
def add_query(cls, profile, sql, model_name=None, auto_begin=True,
select_schema=True, bindings=None):
select_schema=True, bindings=None, abridge_sql_log=False):
# snowflake only allows one query per api call.
queries = sql.strip().split(";")
cursor = None
Expand Down Expand Up @@ -211,7 +211,7 @@ def add_query(cls, profile, sql, model_name=None, auto_begin=True,

connection, cursor = super(PostgresAdapter, cls).add_query(
profile, individual_query, model_name, auto_begin,
bindings=bindings)
bindings=bindings, abridge_sql_log=abridge_sql_log)

return connection, cursor

Expand Down
3 changes: 2 additions & 1 deletion dbt/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,8 @@ def load_project(cls, root_project, all_projects, project, project_name,
all_projects=all_projects,
root_dir=project.get('project-root'),
relative_dirs=project.get('data-paths', []),
resource_type=NodeType.Seed)
resource_type=NodeType.Seed,
macros=macros)


# node loaders
Expand Down
9 changes: 7 additions & 2 deletions dbt/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from dbt.compat import basestring

from dbt.utils import split_path, deep_merge, DBTConfigKeys
from dbt.node_types import NodeType


class SourceConfig(object):
Expand All @@ -24,11 +25,12 @@ class SourceConfig(object):
'bind'
]

def __init__(self, active_project, own_project, fqn):
def __init__(self, active_project, own_project, fqn, node_type):
self._config = None
self.active_project = active_project
self.own_project = own_project
self.fqn = fqn
self.node_type = node_type

# the config options defined within the model
self.in_model_config = {}
Expand Down Expand Up @@ -133,7 +135,10 @@ def get_project_config(self, project):
for k in SourceConfig.ExtendDictFields:
config[k] = {}

model_configs = project.get('models')
if self.node_type == NodeType.Seed:
model_configs = project.get('seeds')
else:
model_configs = project.get('models')

if model_configs is None:
return config
Expand Down
8 changes: 4 additions & 4 deletions dbt/node_runners.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,16 +79,16 @@ def raise_on_first_error(self):
return False

@classmethod
def is_model(cls, node):
return node.get('resource_type') == NodeType.Model
def is_refable(cls, node):
return node.get('resource_type') in [NodeType.Model, NodeType.Seed]

@classmethod
def is_ephemeral(cls, node):
return dbt.utils.get_materialization(node) == 'ephemeral'

@classmethod
def is_ephemeral_model(cls, node):
return cls.is_model(node) and cls.is_ephemeral(node)
return cls.is_refable(node) and cls.is_ephemeral(node)

def safe_run(self, flat_graph, existing):
catchable_errors = (dbt.exceptions.CompilationException,
Expand Down Expand Up @@ -175,7 +175,7 @@ def do_skip(self):
def get_model_schemas(cls, flat_graph):
schemas = set()
for node in flat_graph['nodes'].values():
if cls.is_model(node) and not cls.is_ephemeral(node):
if cls.is_refable(node) and not cls.is_ephemeral(node):
schemas.add(node['schema'])

return schemas
Expand Down
13 changes: 8 additions & 5 deletions dbt/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,15 @@ def resolve_ref(flat_graph, target_model_name, target_model_package,
current_project, node_package):

if target_model_package is not None:
return dbt.utils.find_model_by_name(
return dbt.utils.find_refable_by_name(
flat_graph,
target_model_name,
target_model_package)

target_model = None

# first pass: look for models in the current_project
target_model = dbt.utils.find_model_by_name(
target_model = dbt.utils.find_refable_by_name(
flat_graph,
target_model_name,
current_project)
Expand All @@ -56,7 +56,7 @@ def resolve_ref(flat_graph, target_model_name, target_model_package,
return target_model

# second pass: look for models in the node's package
target_model = dbt.utils.find_model_by_name(
target_model = dbt.utils.find_refable_by_name(
flat_graph,
target_model_name,
node_package)
Expand All @@ -67,7 +67,7 @@ def resolve_ref(flat_graph, target_model_name, target_model_package,
# final pass: look for models in any package
# todo: exclude the packages we have already searched. overriding
# a package model in another package doesn't necessarily work atm
return dbt.utils.find_model_by_name(
return dbt.utils.find_refable_by_name(
flat_graph,
target_model_name,
None)
Expand Down Expand Up @@ -201,7 +201,10 @@ def parse_node(node, node_path, root_project_config, package_project_config,
fqn = get_fqn(node.get('path'), package_project_config, fqn_extra)

config = dbt.model.SourceConfig(
root_project_config, package_project_config, fqn)
root_project_config,
package_project_config,
fqn,
node['resource_type'])

node['unique_id'] = node_path
node['empty'] = ('raw_sql' in node and len(node['raw_sql'].strip()) == 0)
Expand Down
8 changes: 4 additions & 4 deletions dbt/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,14 @@ def model_immediate_name(model, non_destructive):
return "{}__dbt_tmp".format(model_name)


def find_model_by_name(flat_graph, target_name, target_package):
def find_refable_by_name(flat_graph, target_name, target_package):
return find_by_name(flat_graph, target_name, target_package,
'nodes', NodeType.Model)
'nodes', [NodeType.Model, NodeType.Seed])


def find_macro_by_name(flat_graph, target_name, target_package):
return find_by_name(flat_graph, target_name, target_package,
'macros', NodeType.Macro)
'macros', [NodeType.Macro])


def find_by_name(flat_graph, target_name, target_package, subgraph,
Expand All @@ -146,7 +146,7 @@ def find_by_name(flat_graph, target_name, target_package, subgraph,

resource_type, package_name, node_name = node_parts

if (resource_type == nodetype and
if (resource_type in nodetype and
((target_name == node_name) and
(target_package is None or
target_package == package_name))):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
id,first_name,email,ip_address,birthday
1,Larry,lking0@miitbeian.gov.cn,69.135.206.194,2008-09-12 19:08:31
2,Larry,lperkins1@toplist.cz,64.210.133.162,1978-05-09 04:15:14
3,Anna,amontgomery2@miitbeian.gov.cn,168.104.64.114,2011-10-16 04:07:57
4,Sandra,sgeorge3@livejournal.com,229.235.252.98,1973-07-19 10:52:43
5,Fred,fwoods4@google.cn,78.229.170.124,2012-09-30 16:38:29
6,Stephen,shanson5@livejournal.com,182.227.157.105,1995-11-07 21:40:50
7,William,wmartinez6@upenn.edu,135.139.249.50,1982-09-05 03:11:59
8,Jessica,jlong7@hao123.com,203.62.178.210,1991-10-16 11:03:15
9,Douglas,dwhite8@tamu.edu,178.187.247.1,1979-10-01 09:49:48
10,Lisa,lcoleman9@nydailynews.com,168.234.128.249,2011-05-26 07:45:49
11,Ralph,rfieldsa@home.pl,55.152.163.149,1972-11-18 19:06:11
12,Louise,lnicholsb@samsung.com,141.116.153.154,2014-11-25 20:56:14
13,Clarence,cduncanc@sfgate.com,81.171.31.133,2011-11-17 07:02:36
14,Daniel,dfranklind@omniture.com,8.204.211.37,1980-09-13 00:09:04
15,Katherine,klanee@auda.org.au,176.96.134.59,1997-08-22 19:36:56
16,Billy,bwardf@wikia.com,214.108.78.85,2003-10-19 02:14:47
17,Annie,agarzag@ocn.ne.jp,190.108.42.70,1988-10-28 15:12:35
18,Shirley,scolemanh@fastcompany.com,109.251.164.84,1988-08-24 10:50:57
19,Roger,rfrazieri@scribd.com,38.145.218.108,1985-12-31 15:17:15
20,Lillian,lstanleyj@goodreads.com,47.57.236.17,1970-06-08 02:09:05
21 changes: 21 additions & 0 deletions test/integration/005_simple_seed_test/data-config/seed_enabled.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
id,first_name,email,ip_address,birthday
1,Larry,lking0@miitbeian.gov.cn,69.135.206.194,2008-09-12 19:08:31
2,Larry,lperkins1@toplist.cz,64.210.133.162,1978-05-09 04:15:14
3,Anna,amontgomery2@miitbeian.gov.cn,168.104.64.114,2011-10-16 04:07:57
4,Sandra,sgeorge3@livejournal.com,229.235.252.98,1973-07-19 10:52:43
5,Fred,fwoods4@google.cn,78.229.170.124,2012-09-30 16:38:29
6,Stephen,shanson5@livejournal.com,182.227.157.105,1995-11-07 21:40:50
7,William,wmartinez6@upenn.edu,135.139.249.50,1982-09-05 03:11:59
8,Jessica,jlong7@hao123.com,203.62.178.210,1991-10-16 11:03:15
9,Douglas,dwhite8@tamu.edu,178.187.247.1,1979-10-01 09:49:48
10,Lisa,lcoleman9@nydailynews.com,168.234.128.249,2011-05-26 07:45:49
11,Ralph,rfieldsa@home.pl,55.152.163.149,1972-11-18 19:06:11
12,Louise,lnicholsb@samsung.com,141.116.153.154,2014-11-25 20:56:14
13,Clarence,cduncanc@sfgate.com,81.171.31.133,2011-11-17 07:02:36
14,Daniel,dfranklind@omniture.com,8.204.211.37,1980-09-13 00:09:04
15,Katherine,klanee@auda.org.au,176.96.134.59,1997-08-22 19:36:56
16,Billy,bwardf@wikia.com,214.108.78.85,2003-10-19 02:14:47
17,Annie,agarzag@ocn.ne.jp,190.108.42.70,1988-10-28 15:12:35
18,Shirley,scolemanh@fastcompany.com,109.251.164.84,1988-08-24 10:50:57
19,Roger,rfrazieri@scribd.com,38.145.218.108,1985-12-31 15:17:15
20,Lillian,lstanleyj@goodreads.com,47.57.236.17,1970-06-08 02:09:05
83 changes: 82 additions & 1 deletion test/integration/005_simple_seed_test/test_simple_seed.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def test_simple_seed(self):
self.run_dbt(["seed"])
self.assertTablesEqual("seed_actual","seed_expected")

# this should truncate the seed_actual table, the re-insert
# this should truncate the seed_actual table, then re-insert
self.run_dbt(["seed"])
self.assertTablesEqual("seed_actual","seed_expected")

Expand All @@ -40,3 +40,84 @@ def test_simple_seed_with_drop(self):
# this should drop the seed table, then re-create
self.run_dbt(["seed", "--drop-existing"])
self.assertTablesEqual("seed_actual","seed_expected")


class TestSimpleSeedCustomSchema(DBTIntegrationTest):

def setUp(self):
DBTIntegrationTest.setUp(self)
self.run_sql_file("test/integration/005_simple_seed_test/seed.sql")

@property
def schema(self):
return "simple_seed_005"

@property
def models(self):
return "test/integration/005_simple_seed_test/models"

@property
def project_config(self):
return {
"data-paths": ['test/integration/005_simple_seed_test/data'],
"seeds": {
"schema": "custom_schema"
}
}

@attr(type='postgres')
def test_simple_seed_with_schema(self):
schema_name = "{}_{}".format(self.unique_schema(), 'custom_schema')

self.run_dbt(["seed"])
self.assertTablesEqual("seed_actual","seed_expected", table_a_schema=schema_name)

# this should truncate the seed_actual table, then re-insert
self.run_dbt(["seed"])
self.assertTablesEqual("seed_actual","seed_expected", table_a_schema=schema_name)


@attr(type='postgres')
def test_simple_seed_with_drop_and_schema(self):
schema_name = "{}_{}".format(self.unique_schema(), 'custom_schema')

self.run_dbt(["seed"])
self.assertTablesEqual("seed_actual","seed_expected", table_a_schema=schema_name)

# this should drop the seed table, then re-create
self.run_dbt(["seed", "--full-refresh"])
self.assertTablesEqual("seed_actual","seed_expected", table_a_schema=schema_name)


class TestSimpleSeedDisabled(DBTIntegrationTest):

@property
def schema(self):
return "simple_seed_005"

@property
def models(self):
return "test/integration/005_simple_seed_test/models"

@property
def project_config(self):
return {
"data-paths": ['test/integration/005_simple_seed_test/data-config'],
"seeds": {
"test": {
"seed_enabled": {
"enabled": True
},
"seed_disabled": {
"enabled": False
}
}
}
}

@attr(type='postgres')
def test_simple_seed_with_disabled(self):
self.run_dbt(["seed"])
self.assertTableDoesExist('seed_enabled')
self.assertTableDoesNotExist('seed_disabled')

0 comments on commit a5d17a3

Please sign in to comment.