8
8
from modules .data_sources .ChangeTrackingInfo import ChangeTrackingInfo
9
9
from sqlalchemy .sql import text
10
10
11
+
11
12
class MsSqlDataSource (object ):
12
13
13
14
def __init__ (self , connection_string , logger = None ):
@@ -31,32 +32,40 @@ def prefix_column(column_name, full_refresh, primary_key_column_name):
31
32
else :
32
33
return "t.{0}" .format (column_name )
33
34
34
- def build_select_statement (self , table_configuration , columns , batch_configuration , previous_batch_key , full_refresh , change_tracking_info ):
35
- column_array = list (map (lambda cfg : self .prefix_column (cfg ['source_name' ], full_refresh , table_configuration ['primary_key' ]), columns ))
35
+ def build_select_statement (self , table_configuration , columns , batch_configuration , batch_key_tracker , full_refresh ,
36
+ change_tracking_info ):
37
+ column_array = list (
38
+ map (lambda cfg : self .prefix_column (cfg ['source_name' ], full_refresh , table_configuration ['primary_keys' ]),
39
+ columns ))
36
40
column_names = ", " .join (column_array )
41
+
37
42
if full_refresh :
38
- return "SELECT TOP ({0}) {1} FROM {2}.{3} t WHERE t.{4} > {5} ORDER BY t.{4}" .format (batch_configuration ['size' ],
39
- column_names ,
40
- table_configuration [
41
- 'schema' ],
42
- table_configuration [
43
- 'name' ],
44
- table_configuration [
45
- 'primary_key' ],
46
- previous_batch_key )
43
+ order_by = ", t." .join (table_configuration ['primary_keys' ])
44
+
45
+ return "SELECT TOP ({0}) {1} FROM {2}.{3} t WHERE {4} ORDER BY {5}" .format (batch_configuration ['size' ],
46
+ column_names ,
47
+ table_configuration [
48
+ 'schema' ],
49
+ table_configuration [
50
+ 'name' ],
51
+ self .build_where_clause (batch_key_tracker , "t" ),
52
+ order_by )
47
53
else :
54
+ order_by = ", chg." .join (table_configuration ['primary_keys' ])
55
+
48
56
sql_builder = io .StringIO ()
49
57
sql_builder .write ("SELECT TOP ({0}) {1}, " .format (batch_configuration ['size' ], column_names ))
50
- sql_builder .write ("chg.SYS_CHANGE_VERSION as data_pipeline_change_version, CASE chg.SYS_CHANGE_OPERATION WHEN 'D' THEN 1 ELSE 0 END as data_pipeline_is_deleted \n " )
58
+ sql_builder .write (
59
+ "chg.SYS_CHANGE_VERSION as data_pipeline_change_version, CASE chg.SYS_CHANGE_OPERATION WHEN 'D' THEN 1 ELSE 0 END as data_pipeline_is_deleted \n " )
51
60
sql_builder .write ("FROM CHANGETABLE(CHANGES {0}.{1}, {2}) chg " .format (table_configuration ['schema' ],
52
61
table_configuration ['name' ],
53
62
change_tracking_info .this_sync_version ))
54
- sql_builder .write (" LEFT JOIN {0}.{1} t on chg. {2} = t.{2} " .format ( table_configuration ['schema' ],
55
- table_configuration ['name' ],
56
- table_configuration [ 'primary_key' ], ))
63
+ sql_builder .write (" LEFT JOIN {0}.{1} t on {2} " .format (table_configuration ['schema' ],
64
+ table_configuration ['name' ],
65
+ self . build_change_table_on_clause ( batch_key_tracker ) ))
57
66
58
- sql_builder .write ("WHERE chg. {0} > {1} ORDER BY chg.{0} " .format (table_configuration [ 'primary_key' ],
59
- previous_batch_key ))
67
+ sql_builder .write ("WHERE {0}" .format (self . build_where_clause ( batch_key_tracker , "t" )))
68
+ sql_builder . write ( "ORDER BY {0}" . format ( order_by ))
60
69
61
70
return sql_builder .getvalue ()
62
71
@@ -65,7 +74,8 @@ def assert_data_source_is_valid(self, table_configuration, configured_columns):
65
74
columns_in_database = self .get_table_columns (table_configuration )
66
75
67
76
for column in configured_columns :
68
- self .assert_column_exists (column ['source_name' ], columns_in_database , "{0}.{1}" .format (table_configuration ['schema' ], table_configuration ['name' ]))
77
+ self .assert_column_exists (column ['source_name' ], columns_in_database ,
78
+ "{0}.{1}" .format (table_configuration ['schema' ], table_configuration ['name' ]))
69
79
70
80
def assert_column_exists (self , column_name , columns_in_database , table_name ):
71
81
if column_name in columns_in_database :
@@ -82,9 +92,10 @@ def get_table_columns(self, table_configuration):
82
92
autoload_with = self .database_engine )
83
93
return list (map (lambda column : column .name , table .columns ))
84
94
85
-
86
- def get_next_data_frame (self , table_configuration , columns , batch_configuration , batch_tracker , previous_batch_key , full_refresh , change_tracking_info ):
87
- sql = self .build_select_statement (table_configuration , columns , batch_configuration , previous_batch_key , full_refresh , change_tracking_info ,)
95
+ def get_next_data_frame (self , table_configuration , columns , batch_configuration , batch_tracker , batch_key_tracker ,
96
+ full_refresh , change_tracking_info ):
97
+ sql = self .build_select_statement (table_configuration , columns , batch_configuration , batch_key_tracker ,
98
+ full_refresh , change_tracking_info , )
88
99
self .logger .debug ("Starting read of SQL Statement: {0}" .format (sql ))
89
100
data_frame = pandas .read_sql_query (sql , self .database_engine )
90
101
@@ -111,9 +122,11 @@ def init_change_tracking(self, table_configuration, last_sync_version):
111
122
sql_builder .write ("DECLARE @last_sync_version bigint = {0}; \n " .format (last_sync_version ))
112
123
sql_builder .write ("DECLARE @this_sync_version bigint = 0; \n " )
113
124
sql_builder .write ("DECLARE @next_sync_version bigint = CHANGE_TRACKING_CURRENT_VERSION(); \n " )
114
- sql_builder .write ("IF @last_sync_version >= CHANGE_TRACKING_MIN_VALID_VERSION(OBJECT_ID('{0}.{1}'))\n " .format (table_configuration ['schema' ],table_configuration ['name' ]))
125
+ sql_builder .write ("IF @last_sync_version >= CHANGE_TRACKING_MIN_VALID_VERSION(OBJECT_ID('{0}.{1}'))\n " .format (
126
+ table_configuration ['schema' ], table_configuration ['name' ]))
115
127
sql_builder .write (" SET @this_sync_version = @last_sync_version; \n " )
116
- sql_builder .write (" SELECT @next_sync_version as next_sync_version, @this_sync_version as this_sync_version; \n " )
128
+ sql_builder .write (
129
+ " SELECT @next_sync_version as next_sync_version, @this_sync_version as this_sync_version; \n " )
117
130
118
131
self .logger .debug ("Getting ChangeTrackingInformation for {0}.{1}. {2}" .format (table_configuration ['schema' ],
119
132
table_configuration ['name' ],
@@ -125,3 +138,38 @@ def init_change_tracking(self, table_configuration, last_sync_version):
125
138
126
139
force_full_load = bool (row ["this_sync_version" ] == 0 or row ["next_sync_version" ] == 0 )
127
140
return ChangeTrackingInfo (row ["this_sync_version" ], row ["next_sync_version" ], force_full_load )
141
+
142
+ @staticmethod
143
+ def build_where_clause (batch_key_tracker , table_alias ):
144
+ has_value = False
145
+
146
+ try :
147
+ sql_builder = io .StringIO ()
148
+ for primary_key in batch_key_tracker .bookmarks :
149
+ if has_value :
150
+ sql_builder .write (" AND " )
151
+
152
+ sql_builder .write (
153
+ " {0}.{1} > {2}" .format (table_alias , primary_key , batch_key_tracker .bookmarks [primary_key ]))
154
+ has_value = True
155
+
156
+ return sql_builder .getvalue ()
157
+ finally :
158
+ sql_builder .close ()
159
+
160
+ @staticmethod
161
+ def build_change_table_on_clause (batch_key_tracker ):
162
+ has_value = False
163
+
164
+ try :
165
+ sql_builder = io .StringIO ()
166
+ for primary_key in batch_key_tracker .bookmarks :
167
+ if has_value :
168
+ sql_builder .write (" AND " )
169
+
170
+ sql_builder .write (" chg.{0} = t.{0}" .format (primary_key ))
171
+ has_value = True
172
+
173
+ return sql_builder .getvalue ()
174
+ finally :
175
+ sql_builder .close ()
0 commit comments