From 687ef9bb0b24831ed0e1e2bf2bc2c38f4e300207 Mon Sep 17 00:00:00 2001
From: elysenko
Date: Fri, 2 May 2025 18:02:55 -0400
Subject: [PATCH 1/4] adding functionality to write to datasets
---
pysurveycto/pysurveycto.py | 72 +++++++++++++++++++++++++++++++++++++-
1 file changed, 71 insertions(+), 1 deletion(-)
diff --git a/pysurveycto/pysurveycto.py b/pysurveycto/pysurveycto.py
index af9d68b..d1e0f79 100644
--- a/pysurveycto/pysurveycto.py
+++ b/pysurveycto/pysurveycto.py
@@ -13,7 +13,10 @@
import datetime
import warnings
from urllib.parse import quote
-
+import pandas as pd
+import tempfile
+import os
+import requests
class IllegalArgumentError(ValueError):
"""
@@ -724,3 +727,70 @@ def list_forms(self):
raise e
return response.json()['forms']
+
+ def upload_dataset(self, data, dataset_id, dataset_title=None, append=False, fill=False):
+ """
+ Uploads a pandas df to a dataset
+ :return: dictionary with previous dataset preview and upload response
+
+ :param data: pandas DataFrame to upload
+ :param dataset_id: ID of the dataset on SurveyCTO
+ :param dataset_title: Optional title for the dataset (defaults to dataset_id)
+ :param append: If True, appends data; otherwise replaces the dataset
+ :param fill: If True, allows mismatched columns in append mode
+
+ """
+
+ assert isinstance(data, pd.DataFrame), "data must be a pandas DataFrame"
+ assert isinstance(dataset_id, str), "dataset_id must be a string"
+ if dataset_title is None:
+ dataset_title = dataset_id
+
+ headers = self.__auth()
+
+ try:
+ check_resp = self.get_server_dataset(dataset_id)
+ except requests.exceptions.RequestException as e:
+ raise RuntimeError(f"Failed to retrieve dataset: {e}")
+
+ dataset_upload_mode = 'append' if append else 'clear'
+ dataset_exists = 1
+ dataset_type = 'SERVER'
+
+ with tempfile.NamedTemporaryFile(suffix=".csv", delete=False, mode='w', newline='') as tmp:
+ data.to_csv(tmp.name, index=False)
+ tmp_path = tmp.name
+
+ upload_url = (
+ f"https://{self.server_name}.surveycto.com/"
+ f"datasets/{dataset_id}/upload?csrf_token={headers["X-csrf-token"]}"
+ )
+
+ with open(tmp_path, 'rb') as f:
+ files = {
+ 'dataset_file': (os.path.basename(tmp_path), f, 'text/csv')
+ }
+ payload = {
+ 'dataset_exists': dataset_exists,
+ 'dataset_id': dataset_id,
+ 'dataset_title': dataset_title,
+ 'dataset_upload_mode': dataset_upload_mode,
+ 'dataset_type': dataset_type,
+ }
+
+ try:
+ upload_resp = self._sesh.post(
+ upload_url,
+ data=payload,
+ files=files,
+ cookies=self._sesh.cookies,
+ headers=headers
+ )
+ upload_resp.raise_for_status()
+ finally:
+ os.remove(tmp_path)
+
+ return {
+
+ 'response': upload_resp.json()
+ }
From 8c4e9fb0f430332dd533f6df8c7c0ebfd92305ac Mon Sep 17 00:00:00 2001
From: Eric Lysenko
Date: Fri, 30 May 2025 09:56:30 -0600
Subject: [PATCH 2/4] updates to documentation
---
README.md | 17 +++++++++++++++++
README.rst | 16 ++++++++++++++++
2 files changed, 33 insertions(+)
diff --git a/README.md b/README.md
index 5ee2c4a..4ccf740 100644
--- a/README.md
+++ b/README.md
@@ -156,6 +156,23 @@ SurveyCTOObject(server_name,
+*
+ ```python
+ upload_dataset()
+ ```
+ Upload a pandas df to a dataset
+
+ *Parameters:*
+ - **df** *(pd.DataFrame)*: pandas DataFrame to upload
+ - **dataset_id** *(str)*: ID of the dataset on SurveyCTO
+ - **dataset_title** *(str)*: Optional title for the dataset (defaults to dataset_id)
+ - **append** *(bool)*: If True, appends data; otherwise replaces the dataset
+ - **fill** *(bool)*: If True, allows mismatched columns in append mode
+
+ *Returns:* JSON resonse message from server
+
+
+
# Use Cases
diff --git a/README.rst b/README.rst
index 7c254fe..7521f9d 100644
--- a/README.rst
+++ b/README.rst
@@ -172,6 +172,22 @@ Methods:
*Returns:* list of dictionaries, each dictionary containing information for each form on server
+-
+ .. code:: python
+
+ upload_dataset()
+
+ Upload a pandas df to a dataset
+
+ *Parameters:*
+ - **df** *(pd.DataFrame)*: pandas DataFrame to upload
+ - **dataset_id** *(str)*: ID of the dataset on SurveyCTO
+ - **dataset_title** *(str)*: Optional title for the dataset (defaults to dataset_id)
+ - **append** *(bool)*: If True, appends data; otherwise replaces the dataset
+ - **fill** *(bool)*: If True, allows mismatched columns in append mode
+
+ *Returns:* JSON resonse message from server
+
Use Cases
=========
From 4054b5bb2a7f08bfd6f1ac245efda49e0118de4f Mon Sep 17 00:00:00 2001
From: Eric Lysenko
Date: Fri, 30 May 2025 09:58:10 -0600
Subject: [PATCH 3/4] renamed variable df and more robust string concatenation
for url creation
---
pysurveycto/pysurveycto.py | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/pysurveycto/pysurveycto.py b/pysurveycto/pysurveycto.py
index d1e0f79..e3c53ed 100644
--- a/pysurveycto/pysurveycto.py
+++ b/pysurveycto/pysurveycto.py
@@ -728,12 +728,12 @@ def list_forms(self):
return response.json()['forms']
- def upload_dataset(self, data, dataset_id, dataset_title=None, append=False, fill=False):
+ def upload_dataset(self, df, dataset_id, dataset_title=None, append=False, fill=False):
"""
Uploads a pandas df to a dataset
:return: dictionary with previous dataset preview and upload response
- :param data: pandas DataFrame to upload
+ :param df: pandas DataFrame to upload
:param dataset_id: ID of the dataset on SurveyCTO
:param dataset_title: Optional title for the dataset (defaults to dataset_id)
:param append: If True, appends data; otherwise replaces the dataset
@@ -741,7 +741,7 @@ def upload_dataset(self, data, dataset_id, dataset_title=None, append=False, fil
"""
- assert isinstance(data, pd.DataFrame), "data must be a pandas DataFrame"
+ assert isinstance(df, pd.DataFrame), "data must be a pandas DataFrame"
assert isinstance(dataset_id, str), "dataset_id must be a string"
if dataset_title is None:
dataset_title = dataset_id
@@ -758,12 +758,12 @@ def upload_dataset(self, data, dataset_id, dataset_title=None, append=False, fil
dataset_type = 'SERVER'
with tempfile.NamedTemporaryFile(suffix=".csv", delete=False, mode='w', newline='') as tmp:
- data.to_csv(tmp.name, index=False)
+ df.to_csv(tmp.name, index=False)
tmp_path = tmp.name
upload_url = (
f"https://{self.server_name}.surveycto.com/"
- f"datasets/{dataset_id}/upload?csrf_token={headers["X-csrf-token"]}"
+ f'datasets/{dataset_id}/upload?csrf_token=' + '{headers[' + 'X-csrf-token' + ']}'
)
with open(tmp_path, 'rb') as f:
From a4b8b2329571d4b8689818a31da3b02d064a6fbe Mon Sep 17 00:00:00 2001
From: Eric Lysenko
Date: Fri, 30 May 2025 10:01:09 -0600
Subject: [PATCH 4/4] typo fixes
---
README.md | 2 +-
README.rst | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 4ccf740..d070e38 100644
--- a/README.md
+++ b/README.md
@@ -169,7 +169,7 @@ SurveyCTOObject(server_name,
- **append** *(bool)*: If True, appends data; otherwise replaces the dataset
- **fill** *(bool)*: If True, allows mismatched columns in append mode
- *Returns:* JSON resonse message from server
+ *Returns:* JSON response message from server
diff --git a/README.rst b/README.rst
index 7521f9d..4060b2f 100644
--- a/README.rst
+++ b/README.rst
@@ -186,7 +186,7 @@ Methods:
- **append** *(bool)*: If True, appends data; otherwise replaces the dataset
- **fill** *(bool)*: If True, allows mismatched columns in append mode
- *Returns:* JSON resonse message from server
+ *Returns:* JSON response message from server
Use Cases