@@ -29,6 +29,7 @@ def __init__(self, connection_string, logger=None):
29
29
.split (AWSLambdaDataSource .CONNECTION_STRING_GROUP_SEPARATOR )
30
30
)
31
31
self .aws_lambda_client = boto3 .client ("lambda" )
32
+ self .aws_s3_client = boto3 .client ("s3" )
32
33
33
34
@staticmethod
34
35
def can_handle_connection_string (connection_string ):
@@ -43,8 +44,9 @@ def get_connection_string_prefix():
43
44
return AWSLambdaDataSource .CONNECTION_STRING_PREFIX
44
45
45
46
def get_table_info (self , table_config , last_known_sync_version ):
46
- column_names , last_sync_version , sync_version , full_refresh_required , data_changed_since_last_sync \
47
- = self .__get_table_info (table_config , last_known_sync_version )
47
+ column_names , last_sync_version , sync_version , full_refresh_required , data_changed_since_last_sync = self .__get_table_info (
48
+ table_config , last_known_sync_version
49
+ )
48
50
columns_in_database = column_names
49
51
change_tracking_info = ChangeTrackingInfo (
50
52
last_sync_version = last_sync_version ,
@@ -91,11 +93,13 @@ def __get_table_info(self, table_config, last_known_sync_version):
91
93
92
94
result = self .__invoke_lambda (pay_load )
93
95
94
- return result ["ColumnNames" ], \
95
- result ["LastSyncVersion" ], \
96
- result ["CurrentSyncVersion" ], \
97
- result ["FullRefreshRequired" ], \
98
- result ["DataChangedSinceLastSync" ]
96
+ return (
97
+ result ["ColumnNames" ],
98
+ result ["LastSyncVersion" ],
99
+ result ["CurrentSyncVersion" ],
100
+ result ["FullRefreshRequired" ],
101
+ result ["DataChangedSinceLastSync" ],
102
+ )
99
103
100
104
def __get_table_data (
101
105
self ,
@@ -116,23 +120,31 @@ def __get_table_data(
116
120
"BatchSize" : batch_config ["size" ],
117
121
"LastSyncVersion" : change_tracking_info .last_sync_version ,
118
122
"FullRefresh" : full_refresh ,
119
- "ColumnNames" : list (map (lambda cfg : cfg ['source_name' ], columns_config )),
123
+ "ColumnNames" : list (
124
+ map (lambda cfg : cfg ["source_name" ], columns_config )
125
+ ),
120
126
"PrimaryKeyColumnNames" : table_config ["primary_keys" ],
121
127
"LastBatchPrimaryKeys" : [
122
- {"Key" : k , "Value" : v } for k , v in batch_key_tracker .bookmarks .items ()
128
+ {"Key" : k , "Value" : v }
129
+ for k , v in batch_key_tracker .bookmarks .items ()
123
130
],
124
131
},
125
132
}
126
133
127
134
result = self .__invoke_lambda (pay_load )
135
+ command_result = self .aws_s3_client .get_object (
136
+ Bucket = result ["DataBucketName" ], Key = result ["DataKey" ]
137
+ )
128
138
129
- return result ["ColumnNames" ], result ["Data" ]
139
+ data = json .loads (command_result ["Body" ].read ())
140
+
141
+ return result ["ColumnNames" ], data
130
142
131
143
def __get_data_frame (self , data : [[]], column_names : []):
132
144
return pandas .DataFrame (data = data , columns = column_names )
133
145
134
146
def __invoke_lambda (self , pay_load ):
135
- self .logger .debug (' \n Request being sent to Lambda:' )
147
+ self .logger .debug (" \n Request being sent to Lambda:" )
136
148
self .logger .debug (pay_load )
137
149
138
150
lambda_response = self .aws_lambda_client .invoke (
@@ -142,24 +154,28 @@ def __invoke_lambda(self, pay_load):
142
154
Payload = json .dumps (pay_load ).encode (),
143
155
)
144
156
145
- response_status_code = int (lambda_response [' StatusCode' ])
157
+ response_status_code = int (lambda_response [" StatusCode" ])
146
158
response_function_error = lambda_response .get ("FunctionError" )
147
- self .logger .debug (' \n Response received from Lambda:' )
159
+ self .logger .debug (" \n Response received from Lambda:" )
148
160
self .logger .debug (f'Response - StatusCode = "{ response_status_code } "' )
149
161
self .logger .debug (f'Response - FunctionError = "{ response_function_error } "' )
150
162
151
- response_payload = json .loads (lambda_response [' Payload' ].read ())
163
+ response_payload = json .loads (lambda_response [" Payload" ].read ())
152
164
153
165
if response_status_code != 200 or response_function_error :
154
- self .logger .error (F'Error in response from aws lambda { self .connection_data ["function" ]} ' )
155
- self .logger .error (f'Response - Status Code = { response_status_code } ' )
156
- self .logger .error (f'Response - Error Function = { response_function_error } ' )
157
- self .logger .error (f'Response - Error Details:' )
166
+ self .logger .error (
167
+ f'Error in response from aws lambda { self .connection_data ["function" ]} '
168
+ )
169
+ self .logger .error (f"Response - Status Code = { response_status_code } " )
170
+ self .logger .error (f"Response - Error Function = { response_function_error } " )
171
+ self .logger .error (f"Response - Error Details:" )
158
172
# the below is risky as it may contain actual data if this line is reached in case of a successful result
159
173
# however, the same Payload field is used to return actual error details in case of real errors
160
174
# i.e. StatusCode is 200 (since AWS could invoke the lambda)
161
175
# BUT the lambda barfed with an error and therefore the FunctionError would not be None
162
176
self .logger .error (response_payload )
163
- raise Exception ('Error received when invoking AWS Lambda. See logs for further details.' )
177
+ raise Exception (
178
+ "Error received when invoking AWS Lambda. See logs for further details."
179
+ )
164
180
165
181
return response_payload
0 commit comments