Skip to content

[Prototyping] Play around with google drive and AWS testing with github secrets #528

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 39 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
83ac1f4
google drive setup via python api first draft
Mar 25, 2025
e37861e
enable google drive config setup via tui
Mar 25, 2025
01503f7
minor compatibility and ui changes
Mar 26, 2025
7380bac
protectedclient secret input box
Mar 26, 2025
965e016
google drive connection setup via TUI
Mar 26, 2025
6698cb0
add aws as remote storage via python api first draft
Mar 28, 2025
60b77aa
add: logging and connection check for aws s3
Mar 29, 2025
d9755da
update: type checking for aws regions
Mar 29, 2025
f3947a7
add: save aws configs via TUI
Mar 29, 2025
b328af9
add: setup aws connection via TUI
Mar 30, 2025
d2a4d83
feat: setup google drive on machines with no browser
Mar 31, 2025
d9ae864
fix: minor bug
Mar 31, 2025
965bb17
fix: logical error
Mar 31, 2025
377cea7
add: logging for google drive connections
Apr 1, 2025
409d448
refactor: move google drive client secret to be entered at runtime wh…
May 30, 2025
0b33b86
refactor: aws_regions.py; provide aws secret access key at runtime
May 31, 2025
0733f51
add: docstrings to gdrive.py
May 31, 2025
023ada3
add: root_folder_id config to google drive; some refactor
Jun 1, 2025
e869986
refactor: radiobuttons switch in configs.py
Jun 1, 2025
9edbc8f
edit: minor changes to SetupAwsScreen for setting up aws connection
Jun 2, 2025
150f2ea
refactor: SetupGdriveScreen and handle errors
Jun 2, 2025
70659cd
add: some tooltips for google drive configs
Jun 3, 2025
985e921
fix: vanishing central path, radio button order, minor refactor
Jun 4, 2025
d7f13d4
fix: minor bug
Jun 4, 2025
f7807d1
refactor: single button for setup connection
Jun 4, 2025
be8f6b1
add: backwards compatibility to configs while load from config file
Jun 5, 2025
0b7483b
edit: raise error on bucket not present
Jun 5, 2025
2579827
rename: aws region config key
Jun 9, 2025
0a1ca87
rename: connection method from aws_s3 to aws
Jun 9, 2025
8bb7c28
add: utility function to remove duplicate code
Jun 9, 2025
8beaa42
add: docstrings to setup gdrive dialog
Jun 9, 2025
e53984b
update: config dict inplace change for backward compatibility, use ex…
Jun 19, 2025
772c3c1
add: docstrings to setup connection functions; remove: aws region class
Jun 20, 2025
91f2454
add: docstrings to setup widgets function; use backwards compatibility
Jun 20, 2025
ee88875
add: docstrings to rclone function, change arugment order
Jun 20, 2025
c0a7eca
minor changes
Jun 20, 2025
eb3f098
refactor: centralize the get secret function
Jun 20, 2025
77d9a71
Edit GDrive headless method and add test.
JoeZiminski Jun 20, 2025
34d158e
Add prototype AWS test.
JoeZiminski Jun 20, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions .github/workflows/code_test_and_deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,25 @@ jobs:
conda uninstall datashuttle --force
python -m pip install --upgrade pip
pip install .[dev]
- name: Test
run: pytest
# - name: Test
# run: pytest
# - name: Set up Google Drive secrets
# run: |
# printf '%s' '${{ secrets.GDRIVE_SERVICE_ACCOUNT_JSON }}' > "$HOME/gdrive.json"
# echo "GDRIVE_SERVICE_ACCOUNT_FILE=$HOME/gdrive.json" >> $GITHUB_ENV
# echo "GDRIVE_ROOT_FOLDER_ID=${{ secrets.GDRIVE_ROOT_FOLDER_ID }}" >> $GITHUB_ENV

# - name: Run Google Drive tests
# run: pytest -q -k test_gdrive_connection

- name: Set up AWS secrets
run: |
echo "AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }}" >> $GITHUB_ENV
echo "AWS_ACCESS_KEY_ID_SECRET=${{ secrets.AWS_ACCESS_KEY_ID_SECRET }}" >> $GITHUB_ENV
echo "AWS_REGION=${{ secrets.AWS_REGION }}" >> $GITHUB_ENV

- name: Run AWS tests
run: pytest -q -k test_aws_connection

build_sdist_wheels:
name: Build source distribution
Expand Down
43 changes: 43 additions & 0 deletions datashuttle/configs/aws_regions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from typing import Dict, List

# -----------------------------------------------------------------------------
# AWS regions
# -----------------------------------------------------------------------------

# These function are used for type checking and providing intellisense to the developer


def get_aws_regions() -> Dict[str, str]:
aws_regions = {
"US_EAST_1": "us-east-1",
"US_EAST_2": "us-east-2",
"US_WEST_1": "us-west-1",
"US_WEST_2": "us-west-2",
"CA_CENTRAL_1": "ca-central-1",
"EU_WEST_1": "eu-west-1",
"EU_WEST_2": "eu-west-2",
"EU_WEST_3": "eu-west-3",
"EU_NORTH_1": "eu-north-1",
"EU_SOUTH_1": "eu-south-1",
"EU_CENTRAL_1": "eu-central-1",
"AP_SOUTHEAST_1": "ap-southeast-1",
"AP_SOUTHEAST_2": "ap-southeast-2",
"AP_NORTHEAST_1": "ap-northeast-1",
"AP_NORTHEAST_2": "ap-northeast-2",
"AP_NORTHEAST_3": "ap-northeast-3",
"AP_SOUTH_1": "ap-south-1",
"AP_EAST_1": "ap-east-1",
"SA_EAST_1": "sa-east-1",
"IL_CENTRAL_1": "il-central-1",
"ME_SOUTH_1": "me-south-1",
"AF_SOUTH_1": "af-south-1",
"CN_NORTH_1": "cn-north-1",
"CN_NORTHWEST_1": "cn-northwest-1",
"US_GOV_EAST_1": "us-gov-east-1",
"US_GOV_WEST_1": "us-gov-west-1",
}
return aws_regions


def get_aws_regions_list() -> List[str]:
return list(get_aws_regions().values())
31 changes: 30 additions & 1 deletion datashuttle/configs/canonical_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,15 @@ def get_canonical_configs() -> dict:
canonical_configs = {
"local_path": Union[str, Path],
"central_path": Optional[Union[str, Path]],
"connection_method": Optional[Literal["ssh", "local_filesystem"]],
"connection_method": Optional[
Literal["ssh", "local_filesystem", "gdrive", "aws"]
],
"central_host_id": Optional[str],
"central_host_username": Optional[str],
"gdrive_client_id": Optional[str],
"gdrive_root_folder_id": Optional[str],
"aws_access_key_id": Optional[str],
"aws_region": Optional[str],
}

return canonical_configs
Expand Down Expand Up @@ -128,6 +134,29 @@ def check_dict_values_raise_on_fail(config_dict: Configs) -> None:
ConfigError,
)

# Check gdrive settings
elif config_dict["connection_method"] == "gdrive":
if not config_dict["gdrive_root_folder_id"]:
utils.log_and_raise_error(
"'gdrive_root_folder_id' is required if 'connection_method' "
"is 'gdrive'.",
ConfigError,
)

if not config_dict["gdrive_client_id"]:
utils.log_and_message(
"`gdrive_client_id` not found in config. default rlcone client will be used (slower)."
)

# Check AWS settings
elif config_dict["connection_method"] == "aws" and (
not config_dict["aws_access_key_id"] or not config_dict["aws_region"]
):
utils.log_and_raise_error(
"Both aws_access_key_id and aws_region must be present for AWS connection.",
ConfigError,
)

# Initialise the local project folder
utils.print_message_to_user(
f"Making project folder at: {config_dict['local_path']}"
Expand Down
32 changes: 31 additions & 1 deletion datashuttle/configs/config_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,16 +116,46 @@ def dump_to_file(self) -> None:
def load_from_file(self) -> None:
"""
Load a config dict saved at .yaml file. Note this will
not automatically check the configs are valid, this
do a minimal backwards compatibility check and add config
keys to ensure backwards compatibility with new connection
methods added to Datashuttle.
But this will not automatically check the configs are valid, this
requires calling self.check_dict_values_raise_on_fail()
"""
with open(self.file_path, "r") as config_file:
config_dict = yaml.full_load(config_file)

load_configs.convert_str_and_pathlib_paths(config_dict, "str_to_path")

self.update_config_for_backward_compatability_if_required(config_dict)

self.data = config_dict

def update_config_for_backward_compatability_if_required(
self, config_dict: Dict
):
canonical_config_keys_to_add = [
"gdrive_client_id",
"gdrive_root_folder_id",
"aws_access_key_id",
"aws_region",
]

# All keys shall be missing for a backwards compatibility update
if not (
all(
key in config_dict.keys()
for key in canonical_config_keys_to_add
)
):
assert not any(
key in config_dict.keys()
for key in canonical_config_keys_to_add
)

for key in canonical_config_keys_to_add:
config_dict[key] = None

# -------------------------------------------------------------------------
# Utils
# -------------------------------------------------------------------------
Expand Down
119 changes: 119 additions & 0 deletions datashuttle/datashuttle_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@
from datashuttle.configs.config_class import Configs
from datashuttle.datashuttle_functions import _format_top_level_folder
from datashuttle.utils import (
aws,
ds_logger,
folders,
formatting,
gdrive,
getters,
rclone,
ssh,
Expand All @@ -53,6 +55,7 @@
from datashuttle.utils.decorators import ( # noqa
check_configs_set,
check_is_not_local_project,
requires_aws_configs,
requires_ssh_configs,
)

Expand Down Expand Up @@ -892,6 +895,90 @@ def write_public_key(self, filepath: str) -> None:
public.write(key.get_base64())
public.close()

# -------------------------------------------------------------------------
# Google Drive
# -------------------------------------------------------------------------

@check_configs_set
def setup_google_drive_connection(self) -> None:
"""
Setup a connection to Google Drive using the provided credentials.
Assumes `gdrive_root_folder_id` is set in configs.

First, the user will be prompted to enter their Google Drive client
secret if `gdrive_client_id` is set in the configs.

Next, the user will be asked if their machine has access to a browser.
If not, they will be prompted to input a config_token after running an
rclone command displayed to the user on a machine with access to a browser.

Next, with the provided credentials, the final setup will be done. This
opens up a browser if the user confirmed access to a browser.
"""
self._start_log(
"setup-google-drive-connection-to-central-server",
local_vars=locals(),
)

if self.cfg["gdrive_client_id"]:
gdrive_client_secret = gdrive.get_client_secret()
else:
gdrive_client_secret = None

browser_available = gdrive.ask_user_for_browser(log=True)

if not browser_available:
service_account_filepath = utils.get_user_input(
"Please input the path to your credentials json"
) # TODO: add more explanation
# config_token = gdrive.prompt_and_get_config_token(
# self.cfg,
# gdrive_client_secret,
# self.cfg.get_rclone_config_name("gdrive"),
# log=True,
# )
else:
service_account_filepath = None

self._setup_rclone_gdrive_config(
gdrive_client_secret, service_account_filepath, log=True
)

rclone.check_successful_connection_and_raise_error_on_fail(self.cfg)

ds_logger.close_log_filehandler()

# -------------------------------------------------------------------------
# AWS S3
# -------------------------------------------------------------------------

@requires_aws_configs
@check_configs_set
def setup_aws_connection(self) -> None:
"""
Setup a connection to AWS S3 buckets using the provided credentials.
Assumes `aws_access_key_id` and `aws_region` are set in configs.

First, the user will be prompted to input their AWS secret access key.

Next, with the provided credentials, the final connection setup will be done.
"""
self._start_log(
"setup-aws-connection-to-central-server",
local_vars=locals(),
)

aws_secret_access_key = aws.get_aws_secret_access_key()

self._setup_rclone_aws_config(aws_secret_access_key, log=True)

rclone.check_successful_connection_and_raise_error_on_fail(self.cfg)
aws.raise_if_bucket_absent(self.cfg)

utils.log_and_message("AWS Connection Successful.")

ds_logger.close_log_filehandler()

# -------------------------------------------------------------------------
# Configs
# -------------------------------------------------------------------------
Expand All @@ -903,6 +990,10 @@ def make_config_file(
connection_method: str | None = None,
central_host_id: Optional[str] = None,
central_host_username: Optional[str] = None,
gdrive_client_id: Optional[str] = None,
gdrive_root_folder_id: Optional[str] = None,
aws_access_key_id: Optional[str] = None,
aws_region: Optional[str] = None,
) -> None:
"""
Initialise the configurations for datashuttle to use on the
Expand Down Expand Up @@ -967,6 +1058,10 @@ def make_config_file(
"connection_method": connection_method,
"central_host_id": central_host_id,
"central_host_username": central_host_username,
"gdrive_client_id": gdrive_client_id,
"gdrive_root_folder_id": gdrive_root_folder_id,
"aws_access_key_id": aws_access_key_id,
"aws_region": aws_region,
},
)

Expand Down Expand Up @@ -1470,6 +1565,30 @@ def _setup_rclone_central_local_filesystem_config(self) -> None:
self.cfg.get_rclone_config_name("local_filesystem"),
)

def _setup_rclone_gdrive_config(
self,
gdrive_client_secret: str | None,
service_account_filepath: str | None,
log: bool,
) -> None:
rclone.setup_rclone_config_for_gdrive(
self.cfg,
self.cfg.get_rclone_config_name("gdrive"),
gdrive_client_secret,
service_account_filepath,
log=log,
)

def _setup_rclone_aws_config(
self, aws_secret_access_key: str, log: bool
) -> None:
rclone.setup_rclone_config_for_aws(
self.cfg,
self.cfg.get_rclone_config_name("aws"),
aws_secret_access_key,
log=log,
)

# Persistent settings
# -------------------------------------------------------------------------

Expand Down
Loading
Loading