From 687ef9bb0b24831ed0e1e2bf2bc2c38f4e300207 Mon Sep 17 00:00:00 2001 From: elysenko Date: Fri, 2 May 2025 18:02:55 -0400 Subject: [PATCH 1/4] adding functionality to write to datasets --- pysurveycto/pysurveycto.py | 72 +++++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/pysurveycto/pysurveycto.py b/pysurveycto/pysurveycto.py index af9d68b..d1e0f79 100644 --- a/pysurveycto/pysurveycto.py +++ b/pysurveycto/pysurveycto.py @@ -13,7 +13,10 @@ import datetime import warnings from urllib.parse import quote - +import pandas as pd +import tempfile +import os +import requests class IllegalArgumentError(ValueError): """ @@ -724,3 +727,70 @@ def list_forms(self): raise e return response.json()['forms'] + + def upload_dataset(self, data, dataset_id, dataset_title=None, append=False, fill=False): + """ + Uploads a pandas df to a dataset + :return: dictionary with previous dataset preview and upload response + + :param data: pandas DataFrame to upload + :param dataset_id: ID of the dataset on SurveyCTO + :param dataset_title: Optional title for the dataset (defaults to dataset_id) + :param append: If True, appends data; otherwise replaces the dataset + :param fill: If True, allows mismatched columns in append mode + + """ + + assert isinstance(data, pd.DataFrame), "data must be a pandas DataFrame" + assert isinstance(dataset_id, str), "dataset_id must be a string" + if dataset_title is None: + dataset_title = dataset_id + + headers = self.__auth() + + try: + check_resp = self.get_server_dataset(dataset_id) + except requests.exceptions.RequestException as e: + raise RuntimeError(f"Failed to retrieve dataset: {e}") + + dataset_upload_mode = 'append' if append else 'clear' + dataset_exists = 1 + dataset_type = 'SERVER' + + with tempfile.NamedTemporaryFile(suffix=".csv", delete=False, mode='w', newline='') as tmp: + data.to_csv(tmp.name, index=False) + tmp_path = tmp.name + + upload_url = ( + f"https://{self.server_name}.surveycto.com/" + f"datasets/{dataset_id}/upload?csrf_token={headers["X-csrf-token"]}" + ) + + with open(tmp_path, 'rb') as f: + files = { + 'dataset_file': (os.path.basename(tmp_path), f, 'text/csv') + } + payload = { + 'dataset_exists': dataset_exists, + 'dataset_id': dataset_id, + 'dataset_title': dataset_title, + 'dataset_upload_mode': dataset_upload_mode, + 'dataset_type': dataset_type, + } + + try: + upload_resp = self._sesh.post( + upload_url, + data=payload, + files=files, + cookies=self._sesh.cookies, + headers=headers + ) + upload_resp.raise_for_status() + finally: + os.remove(tmp_path) + + return { + + 'response': upload_resp.json() + } From 8c4e9fb0f430332dd533f6df8c7c0ebfd92305ac Mon Sep 17 00:00:00 2001 From: Eric Lysenko Date: Fri, 30 May 2025 09:56:30 -0600 Subject: [PATCH 2/4] updates to documentation --- README.md | 17 +++++++++++++++++ README.rst | 16 ++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/README.md b/README.md index 5ee2c4a..4ccf740 100644 --- a/README.md +++ b/README.md @@ -156,6 +156,23 @@ SurveyCTOObject(server_name,

+* + ```python + upload_dataset() + ``` +

Upload a pandas df to a dataset + + *Parameters:* + - **df** *(pd.DataFrame)*: pandas DataFrame to upload + - **dataset_id** *(str)*: ID of the dataset on SurveyCTO + - **dataset_title** *(str)*: Optional title for the dataset (defaults to dataset_id) + - **append** *(bool)*: If True, appends data; otherwise replaces the dataset + - **fill** *(bool)*: If True, allows mismatched columns in append mode + + *Returns:* JSON resonse message from server +

+ + # Use Cases diff --git a/README.rst b/README.rst index 7c254fe..7521f9d 100644 --- a/README.rst +++ b/README.rst @@ -172,6 +172,22 @@ Methods: *Returns:* list of dictionaries, each dictionary containing information for each form on server +- + .. code:: python + + upload_dataset() + + Upload a pandas df to a dataset + + *Parameters:* + - **df** *(pd.DataFrame)*: pandas DataFrame to upload + - **dataset_id** *(str)*: ID of the dataset on SurveyCTO + - **dataset_title** *(str)*: Optional title for the dataset (defaults to dataset_id) + - **append** *(bool)*: If True, appends data; otherwise replaces the dataset + - **fill** *(bool)*: If True, allows mismatched columns in append mode + + *Returns:* JSON resonse message from server + Use Cases ========= From 4054b5bb2a7f08bfd6f1ac245efda49e0118de4f Mon Sep 17 00:00:00 2001 From: Eric Lysenko Date: Fri, 30 May 2025 09:58:10 -0600 Subject: [PATCH 3/4] renamed variable df and more robust string concatenation for url creation --- pysurveycto/pysurveycto.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pysurveycto/pysurveycto.py b/pysurveycto/pysurveycto.py index d1e0f79..e3c53ed 100644 --- a/pysurveycto/pysurveycto.py +++ b/pysurveycto/pysurveycto.py @@ -728,12 +728,12 @@ def list_forms(self): return response.json()['forms'] - def upload_dataset(self, data, dataset_id, dataset_title=None, append=False, fill=False): + def upload_dataset(self, df, dataset_id, dataset_title=None, append=False, fill=False): """ Uploads a pandas df to a dataset :return: dictionary with previous dataset preview and upload response - :param data: pandas DataFrame to upload + :param df: pandas DataFrame to upload :param dataset_id: ID of the dataset on SurveyCTO :param dataset_title: Optional title for the dataset (defaults to dataset_id) :param append: If True, appends data; otherwise replaces the dataset @@ -741,7 +741,7 @@ def upload_dataset(self, data, dataset_id, dataset_title=None, append=False, fil """ - assert isinstance(data, pd.DataFrame), "data must be a pandas DataFrame" + assert isinstance(df, pd.DataFrame), "data must be a pandas DataFrame" assert isinstance(dataset_id, str), "dataset_id must be a string" if dataset_title is None: dataset_title = dataset_id @@ -758,12 +758,12 @@ def upload_dataset(self, data, dataset_id, dataset_title=None, append=False, fil dataset_type = 'SERVER' with tempfile.NamedTemporaryFile(suffix=".csv", delete=False, mode='w', newline='') as tmp: - data.to_csv(tmp.name, index=False) + df.to_csv(tmp.name, index=False) tmp_path = tmp.name upload_url = ( f"https://{self.server_name}.surveycto.com/" - f"datasets/{dataset_id}/upload?csrf_token={headers["X-csrf-token"]}" + f'datasets/{dataset_id}/upload?csrf_token=' + '{headers[' + 'X-csrf-token' + ']}' ) with open(tmp_path, 'rb') as f: From a4b8b2329571d4b8689818a31da3b02d064a6fbe Mon Sep 17 00:00:00 2001 From: Eric Lysenko Date: Fri, 30 May 2025 10:01:09 -0600 Subject: [PATCH 4/4] typo fixes --- README.md | 2 +- README.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4ccf740..d070e38 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,7 @@ SurveyCTOObject(server_name, - **append** *(bool)*: If True, appends data; otherwise replaces the dataset - **fill** *(bool)*: If True, allows mismatched columns in append mode - *Returns:* JSON resonse message from server + *Returns:* JSON response message from server

diff --git a/README.rst b/README.rst index 7521f9d..4060b2f 100644 --- a/README.rst +++ b/README.rst @@ -186,7 +186,7 @@ Methods: - **append** *(bool)*: If True, appends data; otherwise replaces the dataset - **fill** *(bool)*: If True, allows mismatched columns in append mode - *Returns:* JSON resonse message from server + *Returns:* JSON response message from server Use Cases