Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update of macro for postgres/redshift use of unique_key as a list #4858

Merged
merged 9 commits into from
Mar 22, 2022
7 changes: 7 additions & 0 deletions .changes/unreleased/Features-20220314-112341.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Features
body: Allow unique key to take a list implementation for postgres/redshift
time: 2022-03-14T11:23:41.293726-05:00
custom:
Author: McKnight-42
Issue: "4738"
PR: "4858"
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,26 @@

{%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute="name")) -%}

{% if unique_key is not none %}
delete from {{ target }}
where ({{ unique_key }}) in (
select ({{ unique_key }})
from {{ source }}
);
{% endif %}
{% if unique_key %}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was worried that this may be a breaking change for any users who have defined a unique key named "False". Putting aside how confusing that would be — we tested it out, and this logic should still work.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also an empty string

{% if unique_key is sequence and unique_key is not string %}
delete from {{target }}
McKnight-42 marked this conversation as resolved.
Show resolved Hide resolved
using {{ source }}
where (
{% for key in unique_key %}
{{ source }}.{{ key }} = {{ target }}.{{ key }}
{{ "and " if not loop.last }}
{% endfor %}
);
{% else %}
delete from {{ target }}
where (
{{ unique_key }}) in (
select ({{ unique_key }})
from {{ source }}
);

{% endif %}
{% endif %}
McKnight-42 marked this conversation as resolved.
Show resolved Hide resolved

insert into {{ target }} ({{ dest_cols_csv }})
(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{{
config(
materialized='incremental',
unique_key=['state', 'state']
)
}}

select
state::varchar(2) as state,
county::varchar(12) as county,
city::varchar(12) as city,
last_visit_date::date as last_visit_date
from {{ ref('seed') }}

{% if is_incremental() %}
where last_visit_date > (select max(last_visit_date) from {{ this }})
{% endif %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
-- ensure model with empty string unique key should build normally

{{
config(
materialized='incremental',
unique_key=''
)
}}

select
*
from {{ ref('seed') }}

{% if is_incremental() %}
where last_visit_date > (select max(last_visit_date) from {{ this }})
{% endif %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
-- model with empty list unique key should build normally

{{
config(
materialized='incremental',
unique_key=[]
)
}}

select * from {{ ref('seed') }}

{% if is_incremental() %}
where last_visit_date > (select max(last_visit_date) from {{ this }})
{% endif %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{{
config(
materialized='table'
)
}}

select
'CT'::varchar(2) as state,
'Hartford'::varchar(12) as county,
'Hartford'::varchar(12) as city,
'2022-02-14'::date as last_visit_date
union all
select 'MA'::varchar(2),'Suffolk'::varchar(12),'Boston'::varchar(12),'2020-02-12'::date
McKnight-42 marked this conversation as resolved.
Show resolved Hide resolved
union all
select 'NJ'::varchar(2),'Mercer'::varchar(12),'Trenton'::varchar(12),'2022-01-01'::date
union all
select 'NY'::varchar(2),'Kings'::varchar(12),'Brooklyn'::varchar(12),'2021-04-02'::date
union all
select 'NY'::varchar(2),'New York'::varchar(12),'Manhattan'::varchar(12),'2021-04-01'::date
union all
select 'PA'::varchar(2),'Philadelphia'::varchar(12),'Philadelphia'::varchar(12),'2021-05-21'::date
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{{
config(
materialized='table'
)
}}

select
'CT'::varchar(2) as state,
'Hartford'::varchar(12) as county,
'Hartford'::varchar(12) as city,
'2022-02-14'::date as last_visit_date
union all
select 'MA'::varchar(2),'Suffolk'::varchar(12),'Boston'::varchar(12),'2020-02-12'::date
union all
select 'NJ'::varchar(2),'Mercer'::varchar(12),'Trenton'::varchar(12),'2022-01-01'::date
union all
select 'NY'::varchar(2),'Kings'::varchar(12),'Brooklyn'::varchar(12),'2021-04-02'::date
union all
select 'NY'::varchar(2),'New York'::varchar(12),'Manhattan'::varchar(12),'2021-04-01'::date
union all
select 'PA'::varchar(2),'Philadelphia'::varchar(12),'Philadelphia'::varchar(12),'2021-05-21'::date
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
-- no specified unique key should cause no special build behavior

{{
config(
materialized='incremental'
)
}}

select
*
from {{ ref('seed') }}

{% if is_incremental() %}
where last_visit_date > (select max(last_visit_date) from {{ this }})
{% endif %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
-- a multi-argument unique key list should see overwriting on rows in the model
-- where all unique key fields apply
-- N.B. needed for direct comparison with seed

{{
config(
materialized='incremental',
unique_key=['state', 'county', 'city']
)
}}

select
state as state,
county as county,
city as city,
last_visit_date as last_visit_date
from {{ ref('seed') }}

{% if is_incremental() %}
where last_visit_date > (select max(last_visit_date) from {{ this }})
{% endif %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
-- a model with a unique key not found in the table itself will error out

{{
config(
materialized='incremental',
unique_key='thisisnotacolumn'
)
}}

select
*
from {{ ref('seed') }}

{% if is_incremental() %}
where last_visit_date > (select max(last_visit_date) from {{ this }})
{% endif %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
-- a unique key list with any element not in the model itself should error out

{{
config(
materialized='incremental',
unique_key=['state', 'thisisnotacolumn']
)
}}

select * from {{ ref('seed') }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
-- a unique key with a string should trigger to overwrite behavior when
-- the source has entries in conflict (i.e. more than one row per unique key
-- combination)

{{
config(
materialized='incremental',
unique_key='state'
)
}}

select
state::varchar(2) as state,
county::varchar(12) as county,
city::varchar(12) as city,
last_visit_date::date as last_visit_date
from {{ ref('seed') }}

{% if is_incremental() %}
where last_visit_date > (select max(last_visit_date) from {{ this }})
{% endif %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
-- a multi-argument unique key list should see overwriting on rows in the model
-- where all unique key fields apply

{{
config(
materialized='incremental',
unique_key=['state', 'county', 'city']
)
}}

select
state::varchar(2) as state,
county::varchar(12) as county,
city::varchar(12) as city,
last_visit_date::date as last_visit_date
from {{ ref('seed') }}

{% if is_incremental() %}
where last_visit_date > (select max(last_visit_date) from {{ this }})
{% endif %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
-- a one argument unique key list should result in overwritting semantics for
-- that one matching field

{{
config(
materialized='incremental',
unique_key=['state']
)
}}

select
state::varchar(2) as state,
county::varchar(12) as county,
city::varchar(12) as city,
last_visit_date::date as last_visit_date
from {{ ref('seed') }}

{% if is_incremental() %}
where last_visit_date > (select max(last_visit_date) from {{ this }})
{% endif %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
-- Insert statement which when applied to seed.csv sees incremental model
-- grow in size while not (necessarily) diverging from the seed itself.

-- insert two new rows, both of which should be in incremental model
-- with any unique columns
insert into {schema}.seed
(state, county, city, last_visit_date)
values ('WA','King','Seattle','2022-02-01');

insert into {schema}.seed
(state, county, city, last_visit_date)
values ('CA','Los Angeles','Los Angeles','2022-02-01');
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- Insert statement which when applied to seed.csv triggers the inplace
-- overwrite strategy of incremental models. Seed and incremental model
-- diverge.

-- insert new row, which should not be in incremental model
-- with primary or first three columns unique
insert into {schema}.seed
(state, county, city, last_visit_date)
values ('CT','Hartford','Hartford','2022-02-14');
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
state,county,city,last_visit_date
CT,Hartford,Hartford,2020-09-23
MA,Suffolk,Boston,2020-02-12
NJ,Mercer,Trenton,2022-01-01
NY,Kings,Brooklyn,2021-04-02
NY,New York,Manhattan,2021-04-01
PA,Philadelphia,Philadelphia,2021-05-21
Loading