diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bdae53..8fe3879 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Changelog +## [Version 1.1.4](https://github.com/dataiku/dss-plugin-api-connect/releases/tag/v1.1.4) - Feature and bugfix release - 2023-02-28 + +- Add Brotli compression +- Faster recurring calls + +## [Version 1.1.3](https://github.com/dataiku/dss-plugin-api-connect/releases/tag/v1.1.3) - Bugfix release - 2023-04-18 + +- Updated code-env descriptor for DSS 12 + ## [Version 1.1.2](https://github.com/dataiku/dss-plugin-api-connect/releases/tag/v1.1.2) - Bugfix release - 2022-10-19 - Fix for last page of RFC5988 pagination triggering loop condtion diff --git a/code-env/python/spec/requirements.txt b/code-env/python/spec/requirements.txt index b443370..5f763dd 100644 --- a/code-env/python/spec/requirements.txt +++ b/code-env/python/spec/requirements.txt @@ -1,2 +1,4 @@ jsonpath-ng==1.5.3 requests_ntlm==1.1.0 +requests==2.26.0 +Brotli==1.0.9 diff --git a/custom-recipes/api-connect/recipe.py b/custom-recipes/api-connect/recipe.py index 97d6a29..9d6df39 100644 --- a/custom-recipes/api-connect/recipe.py +++ b/custom-recipes/api-connect/recipe.py @@ -5,9 +5,10 @@ from safe_logger import SafeLogger from dku_utils import get_dku_key_values, get_endpoint_parameters from rest_api_recipe_session import RestApiRecipeSession +from dku_constants import DKUConstants -logger = SafeLogger("api-connect plugin", forbiden_keys=["token", "password"]) +logger = SafeLogger("api-connect plugin", forbidden_keys=DKUConstants.FORBIDDEN_KEYS) def get_partitioning_keys(id_list, dku_flow_variables): @@ -24,7 +25,8 @@ def get_partitioning_keys(id_list, dku_flow_variables): partitioning_keys[dimension] = dku_flow_variables.get(dimension_src) return partitioning_keys -logger.info('API-Connect plugin recipe v1.1.2') + +logger.info('API-Connect plugin recipe v{}'.format(DKUConstants.PLUGIN_VERSION)) input_A_names = get_input_names_for_role('input_A_role') config = get_recipe_config() diff --git a/plugin.json b/plugin.json index 52543a3..28b78a4 100644 --- a/plugin.json +++ b/plugin.json @@ -1,6 +1,6 @@ { "id": "api-connect", - "version": "1.1.2", + "version": "1.1.4", "meta": { "label": "API Connect", "description": "Retrieve data from any REST API", diff --git a/python-connectors/api-connect_dataset/connector.py b/python-connectors/api-connect_dataset/connector.py index a1212d3..5e418ec 100644 --- a/python-connectors/api-connect_dataset/connector.py +++ b/python-connectors/api-connect_dataset/connector.py @@ -7,14 +7,14 @@ import json -logger = SafeLogger("api-connect plugin", forbiden_keys=["token", "password"]) +logger = SafeLogger("api-connect plugin", forbidden_keys=DKUConstants.FORBIDDEN_KEYS) class RestAPIConnector(Connector): def __init__(self, config, plugin_config): Connector.__init__(self, config, plugin_config) # pass the parameters to the base class - logger.info('API-Connect plugin connector v1.1.2') + logger.info('API-Connect plugin connector v{}'.format(DKUConstants.PLUGIN_VERSION)) logger.info("config={}".format(logger.filter_secrets(config))) endpoint_parameters = get_endpoint_parameters(config) credential = config.get("credential", {}) diff --git a/python-lib/dku_constants.py b/python-lib/dku_constants.py index 7d62cf5..e0dc24e 100644 --- a/python-lib/dku_constants.py +++ b/python-lib/dku_constants.py @@ -1,4 +1,6 @@ class DKUConstants(object): API_RESPONSE_KEY = "api_response" - RAW_BODY_FORMAT = "RAW" + FORBIDDEN_KEYS = ["token", "password", "api_key_value"] FORM_DATA_BODY_FORMAT = "FORM_DATA" + PLUGIN_VERSION = "1.1.4" + RAW_BODY_FORMAT = "RAW" diff --git a/python-lib/rest_api_client.py b/python-lib/rest_api_client.py index bd173d6..c805105 100644 --- a/python-lib/rest_api_client.py +++ b/python-lib/rest_api_client.py @@ -8,7 +8,7 @@ from dku_constants import DKUConstants -logger = SafeLogger("api-connect plugin", forbiden_keys=["token", "password"]) +logger = SafeLogger("api-connect plugin", forbidden_keys=DKUConstants.FORBIDDEN_KEYS) class RestAPIClientError(ValueError): @@ -17,7 +17,7 @@ class RestAPIClientError(ValueError): class RestAPIClient(object): - def __init__(self, credential, endpoint, custom_key_values={}): + def __init__(self, credential, endpoint, custom_key_values={}, session=None): logger.info("Initialising RestAPIClient, credential={}, endpoint={}".format(logger.filter_secrets(credential), endpoint)) # presets_variables contains all variables available in templates using the {{variable_name}} notation @@ -91,6 +91,7 @@ def __init__(self, credential, endpoint, custom_key_values={}): self.requests_kwargs.update({"json": get_dku_key_values(key_value_body)}) self.metadata = {} self.call_number = 0 + self.session = session or requests.Session() def set_login(self, credential): login_type = credential.get("login_type", "no_auth") @@ -131,19 +132,23 @@ def request(self, method, url, can_raise_exeption=True, **kwargs): raise RestAPIClientError("The api-connect plugin is stuck in a loop. Please check the pagination parameters.") request_start_time = time.time() self.time_last_request = request_start_time + error_message = None try: response = self.request_with_redirect_retry(method, url, **kwargs) - request_finish_time = time.time() except Exception as err: self.pagination.is_last_batch_empty = True error_message = "Error: {}".format(err) if can_raise_exeption: raise RestAPIClientError(error_message) - else: - return {"error": error_message} + + request_finish_time = time.time() self.set_metadata("request_duration", request_finish_time - request_start_time) self.set_metadata("status_code", response.status_code) self.set_metadata("response_headers", "{}".format(response.headers)) + + if error_message: + return {"error": error_message} + if response.status_code >= 400: error_message = "Error {}: {}".format(response.status_code, response.content) self.pagination.is_last_batch_empty = True @@ -171,12 +176,12 @@ def request(self, method, url, can_raise_exeption=True, **kwargs): def request_with_redirect_retry(self, method, url, **kwargs): # In case of redirection to another domain, the authorization header is not kept # If redirect_auth_header is true, another attempt is made with initial headers to the redirected url - response = requests.request(method, url, **kwargs) + response = self.session.request(method, url, **kwargs) if self.redirect_auth_header and not response.url.startswith(url): redirection_kwargs = copy.deepcopy(kwargs) redirection_kwargs.pop("params", None) # params are contained in the redirected url logger.warning("Redirection ! Accessing endpoint {} with initial authorization headers".format(response.url)) - response = requests.request(method, response.url, **redirection_kwargs) + response = self.session.request(method, response.url, **redirection_kwargs) return response def paginated_api_call(self, can_raise_exeption=True): diff --git a/python-lib/rest_api_recipe_session.py b/python-lib/rest_api_recipe_session.py index c3340ec..45acfbb 100644 --- a/python-lib/rest_api_recipe_session.py +++ b/python-lib/rest_api_recipe_session.py @@ -5,8 +5,10 @@ from dku_constants import DKUConstants import copy import json +import requests -logger = SafeLogger("api-connect plugin", forbiden_keys=["token", "password"]) + +logger = SafeLogger("api-connect plugin", forbidden_keys=DKUConstants.FORBIDDEN_KEYS) class RestApiRecipeSession: @@ -38,6 +40,7 @@ def get_column_to_parameter_dict(parameter_columns, parameter_renamings): def process_dataframe(self, input_parameters_dataframe, is_raw_output): results = [] time_last_request = None + session = requests.Session() for index, input_parameters_row in input_parameters_dataframe.iterrows(): rows_count = 0 self.initial_parameter_columns = {} @@ -52,7 +55,7 @@ def process_dataframe(self, input_parameters_dataframe, is_raw_output): updated_endpoint_parameters, self.custom_key_values )) - self.client = RestAPIClient(self.credential_parameters, updated_endpoint_parameters, custom_key_values=self.custom_key_values) + self.client = RestAPIClient(self.credential_parameters, updated_endpoint_parameters, custom_key_values=self.custom_key_values, session=session) self.client.time_last_request = time_last_request while self.client.has_more_data(): page_results = self.retrieve_next_page(is_raw_output) @@ -76,7 +79,7 @@ def retrieve_next_page(self, is_raw_output): if self.can_raise: raise DataikuException(error_message) else: - return [{"error": error_message}] + return self.format_page_rows([{"error": error_message}], is_raw_output, metadata) page_rows.extend(self.format_page_rows(data_rows, is_raw_output, metadata)) else: # Todo: check api_response key is free and add something overwise diff --git a/python-lib/safe_logger.py b/python-lib/safe_logger.py index d43196a..db48711 100644 --- a/python-lib/safe_logger.py +++ b/python-lib/safe_logger.py @@ -3,14 +3,14 @@ class SafeLogger(object): - def __init__(self, name, forbiden_keys=None): + def __init__(self, name, forbidden_keys=None): self.name = name self.logger = logging.getLogger(self.name) logging.basicConfig( level=logging.INFO, format='{} %(levelname)s - %(message)s'.format(self.name) ) - self.forbiden_keys = forbiden_keys + self.forbidden_keys = forbidden_keys def info(self, message): self.logger.info(message) @@ -33,7 +33,7 @@ def dig_secrets(self, dictionary): for key in dictionary: if isinstance(dictionary[key], dict): dictionary[key] = self.filter_secrets(dictionary[key]) - if key in self.forbiden_keys: + if key in self.forbidden_keys: dictionary[key] = hash(dictionary[key]) return dictionary